Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Dogleg algorithm with rectangular trust regions for least-squares minimization. 

3 

4The description of the algorithm can be found in [Voglis]_. The algorithm does 

5trust-region iterations, but the shape of trust regions is rectangular as 

6opposed to conventional elliptical. The intersection of a trust region and 

7an initial feasible region is again some rectangle. Thus, on each iteration a 

8bound-constrained quadratic optimization problem is solved. 

9 

10A quadratic problem is solved by well-known dogleg approach, where the 

11function is minimized along piecewise-linear "dogleg" path [NumOpt]_, 

12Chapter 4. If Jacobian is not rank-deficient then the function is decreasing 

13along this path, and optimization amounts to simply following along this 

14path as long as a point stays within the bounds. A constrained Cauchy step 

15(along the anti-gradient) is considered for safety in rank deficient cases, 

16in this situations the convergence might be slow. 

17 

18If during iterations some variable hit the initial bound and the component 

19of anti-gradient points outside the feasible region, then a next dogleg step 

20won't make any progress. At this state such variables satisfy first-order 

21optimality conditions and they are excluded before computing a next dogleg 

22step. 

23 

24Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense 

25Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for 

26dense and sparse matrices, or Jacobian being LinearOperator). The second 

27option allows to solve very large problems (up to couple of millions of 

28residuals on a regular PC), provided the Jacobian matrix is sufficiently 

29sparse. But note that dogbox is not very good for solving problems with 

30large number of constraints, because of variables exclusion-inclusion on each 

31iteration (a required number of function evaluations might be high or accuracy 

32of a solution will be poor), thus its large-scale usage is probably limited 

33to unconstrained problems. 

34 

35References 

36---------- 

37.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg 

38 Approach for Unconstrained and Bound Constrained Nonlinear 

39 Optimization", WSEAS International Conference on Applied 

40 Mathematics, Corfu, Greece, 2004. 

41.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition". 

42""" 

43import numpy as np 

44from numpy.linalg import lstsq, norm 

45 

46from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr 

47from scipy.optimize import OptimizeResult 

48 

49from .common import ( 

50 step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic, 

51 build_quadratic_1d, minimize_quadratic_1d, compute_grad, 

52 compute_jac_scale, check_termination, scale_for_robust_loss_function, 

53 print_header_nonlinear, print_iteration_nonlinear) 

54 

55 

56def lsmr_operator(Jop, d, active_set): 

57 """Compute LinearOperator to use in LSMR by dogbox algorithm. 

58 

59 `active_set` mask is used to excluded active variables from computations 

60 of matrix-vector products. 

61 """ 

62 m, n = Jop.shape 

63 

64 def matvec(x): 

65 x_free = x.ravel().copy() 

66 x_free[active_set] = 0 

67 return Jop.matvec(x * d) 

68 

69 def rmatvec(x): 

70 r = d * Jop.rmatvec(x) 

71 r[active_set] = 0 

72 return r 

73 

74 return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float) 

75 

76 

77def find_intersection(x, tr_bounds, lb, ub): 

78 """Find intersection of trust-region bounds and initial bounds. 

79 

80 Returns 

81 ------- 

82 lb_total, ub_total : ndarray with shape of x 

83 Lower and upper bounds of the intersection region. 

84 orig_l, orig_u : ndarray of bool with shape of x 

85 True means that an original bound is taken as a corresponding bound 

86 in the intersection region. 

87 tr_l, tr_u : ndarray of bool with shape of x 

88 True means that a trust-region bound is taken as a corresponding bound 

89 in the intersection region. 

90 """ 

91 lb_centered = lb - x 

92 ub_centered = ub - x 

93 

94 lb_total = np.maximum(lb_centered, -tr_bounds) 

95 ub_total = np.minimum(ub_centered, tr_bounds) 

96 

97 orig_l = np.equal(lb_total, lb_centered) 

98 orig_u = np.equal(ub_total, ub_centered) 

99 

100 tr_l = np.equal(lb_total, -tr_bounds) 

101 tr_u = np.equal(ub_total, tr_bounds) 

102 

103 return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u 

104 

105 

106def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub): 

107 """Find dogleg step in a rectangular region. 

108 

109 Returns 

110 ------- 

111 step : ndarray, shape (n,) 

112 Computed dogleg step. 

113 bound_hits : ndarray of int, shape (n,) 

114 Each component shows whether a corresponding variable hits the 

115 initial bound after the step is taken: 

116 * 0 - a variable doesn't hit the bound. 

117 * -1 - lower bound is hit. 

118 * 1 - upper bound is hit. 

119 tr_hit : bool 

120 Whether the step hit the boundary of the trust-region. 

121 """ 

122 lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection( 

123 x, tr_bounds, lb, ub 

124 ) 

125 bound_hits = np.zeros_like(x, dtype=int) 

126 

127 if in_bounds(newton_step, lb_total, ub_total): 

128 return newton_step, bound_hits, False 

129 

130 to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total) 

131 

132 # The classical dogleg algorithm would check if Cauchy step fits into 

133 # the bounds, and just return it constrained version if not. But in a 

134 # rectangular trust region it makes sense to try to improve constrained 

135 # Cauchy step too. Thus, we don't distinguish these two cases. 

136 

137 cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g 

138 

139 step_diff = newton_step - cauchy_step 

140 step_size, hits = step_size_to_bound(cauchy_step, step_diff, 

141 lb_total, ub_total) 

142 bound_hits[(hits < 0) & orig_l] = -1 

143 bound_hits[(hits > 0) & orig_u] = 1 

144 tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u) 

145 

146 return cauchy_step + step_size * step_diff, bound_hits, tr_hit 

147 

148 

149def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, 

150 loss_function, tr_solver, tr_options, verbose): 

151 f = f0 

152 f_true = f.copy() 

153 nfev = 1 

154 

155 J = J0 

156 njev = 1 

157 

158 if loss_function is not None: 

159 rho = loss_function(f) 

160 cost = 0.5 * np.sum(rho[0]) 

161 J, f = scale_for_robust_loss_function(J, f, rho) 

162 else: 

163 cost = 0.5 * np.dot(f, f) 

164 

165 g = compute_grad(J, f) 

166 

167 jac_scale = isinstance(x_scale, str) and x_scale == 'jac' 

168 if jac_scale: 

169 scale, scale_inv = compute_jac_scale(J) 

170 else: 

171 scale, scale_inv = x_scale, 1 / x_scale 

172 

173 Delta = norm(x0 * scale_inv, ord=np.inf) 

174 if Delta == 0: 

175 Delta = 1.0 

176 

177 on_bound = np.zeros_like(x0, dtype=int) 

178 on_bound[np.equal(x0, lb)] = -1 

179 on_bound[np.equal(x0, ub)] = 1 

180 

181 x = x0 

182 step = np.empty_like(x0) 

183 

184 if max_nfev is None: 

185 max_nfev = x0.size * 100 

186 

187 termination_status = None 

188 iteration = 0 

189 step_norm = None 

190 actual_reduction = None 

191 

192 if verbose == 2: 

193 print_header_nonlinear() 

194 

195 while True: 

196 active_set = on_bound * g < 0 

197 free_set = ~active_set 

198 

199 g_free = g[free_set] 

200 g_full = g.copy() 

201 g[active_set] = 0 

202 

203 g_norm = norm(g, ord=np.inf) 

204 if g_norm < gtol: 

205 termination_status = 1 

206 

207 if verbose == 2: 

208 print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, 

209 step_norm, g_norm) 

210 

211 if termination_status is not None or nfev == max_nfev: 

212 break 

213 

214 x_free = x[free_set] 

215 lb_free = lb[free_set] 

216 ub_free = ub[free_set] 

217 scale_free = scale[free_set] 

218 

219 # Compute (Gauss-)Newton and build quadratic model for Cauchy step. 

220 if tr_solver == 'exact': 

221 J_free = J[:, free_set] 

222 newton_step = lstsq(J_free, -f, rcond=-1)[0] 

223 

224 # Coefficients for the quadratic model along the anti-gradient. 

225 a, b = build_quadratic_1d(J_free, g_free, -g_free) 

226 elif tr_solver == 'lsmr': 

227 Jop = aslinearoperator(J) 

228 

229 # We compute lsmr step in scaled variables and then 

230 # transform back to normal variables, if lsmr would give exact lsq 

231 # solution, this would be equivalent to not doing any 

232 # transformations, but from experience it's better this way. 

233 

234 # We pass active_set to make computations as if we selected 

235 # the free subset of J columns, but without actually doing any 

236 # slicing, which is expensive for sparse matrices and impossible 

237 # for LinearOperator. 

238 

239 lsmr_op = lsmr_operator(Jop, scale, active_set) 

240 newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set] 

241 newton_step *= scale_free 

242 

243 # Components of g for active variables were zeroed, so this call 

244 # is correct and equivalent to using J_free and g_free. 

245 a, b = build_quadratic_1d(Jop, g, -g) 

246 

247 actual_reduction = -1.0 

248 while actual_reduction <= 0 and nfev < max_nfev: 

249 tr_bounds = Delta * scale_free 

250 

251 step_free, on_bound_free, tr_hit = dogleg_step( 

252 x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free) 

253 

254 step.fill(0.0) 

255 step[free_set] = step_free 

256 

257 if tr_solver == 'exact': 

258 predicted_reduction = -evaluate_quadratic(J_free, g_free, 

259 step_free) 

260 elif tr_solver == 'lsmr': 

261 predicted_reduction = -evaluate_quadratic(Jop, g, step) 

262 

263 x_new = x + step 

264 f_new = fun(x_new) 

265 nfev += 1 

266 

267 step_h_norm = norm(step * scale_inv, ord=np.inf) 

268 

269 if not np.all(np.isfinite(f_new)): 

270 Delta = 0.25 * step_h_norm 

271 continue 

272 

273 # Usual trust-region step quality estimation. 

274 if loss_function is not None: 

275 cost_new = loss_function(f_new, cost_only=True) 

276 else: 

277 cost_new = 0.5 * np.dot(f_new, f_new) 

278 actual_reduction = cost - cost_new 

279 

280 Delta, ratio = update_tr_radius( 

281 Delta, actual_reduction, predicted_reduction, 

282 step_h_norm, tr_hit 

283 ) 

284 

285 step_norm = norm(step) 

286 termination_status = check_termination( 

287 actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) 

288 

289 if termination_status is not None: 

290 break 

291 

292 if actual_reduction > 0: 

293 on_bound[free_set] = on_bound_free 

294 

295 x = x_new 

296 # Set variables exactly at the boundary. 

297 mask = on_bound == -1 

298 x[mask] = lb[mask] 

299 mask = on_bound == 1 

300 x[mask] = ub[mask] 

301 

302 f = f_new 

303 f_true = f.copy() 

304 

305 cost = cost_new 

306 

307 J = jac(x, f) 

308 njev += 1 

309 

310 if loss_function is not None: 

311 rho = loss_function(f) 

312 J, f = scale_for_robust_loss_function(J, f, rho) 

313 

314 g = compute_grad(J, f) 

315 

316 if jac_scale: 

317 scale, scale_inv = compute_jac_scale(J, scale_inv) 

318 else: 

319 step_norm = 0 

320 actual_reduction = 0 

321 

322 iteration += 1 

323 

324 if termination_status is None: 

325 termination_status = 0 

326 

327 return OptimizeResult( 

328 x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm, 

329 active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)