Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Generic interface for least-squares minimization.""" 

2from warnings import warn 

3 

4import numpy as np 

5from numpy.linalg import norm 

6 

7from scipy.sparse import issparse, csr_matrix 

8from scipy.sparse.linalg import LinearOperator 

9from scipy.optimize import _minpack, OptimizeResult 

10from scipy.optimize._numdiff import approx_derivative, group_columns 

11 

12from .trf import trf 

13from .dogbox import dogbox 

14from .common import EPS, in_bounds, make_strictly_feasible 

15 

16 

17TERMINATION_MESSAGES = { 

18 -1: "Improper input parameters status returned from `leastsq`", 

19 0: "The maximum number of function evaluations is exceeded.", 

20 1: "`gtol` termination condition is satisfied.", 

21 2: "`ftol` termination condition is satisfied.", 

22 3: "`xtol` termination condition is satisfied.", 

23 4: "Both `ftol` and `xtol` termination conditions are satisfied." 

24} 

25 

26 

27FROM_MINPACK_TO_COMMON = { 

28 0: -1, # Improper input parameters from MINPACK. 

29 1: 2, 

30 2: 3, 

31 3: 4, 

32 4: 1, 

33 5: 0 

34 # There are 6, 7, 8 for too small tolerance parameters, 

35 # but we guard against it by checking ftol, xtol, gtol beforehand. 

36} 

37 

38 

39def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step): 

40 n = x0.size 

41 

42 if diff_step is None: 

43 epsfcn = EPS 

44 else: 

45 epsfcn = diff_step**2 

46 

47 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and 

48 # ``x_scale='jac'`` corresponds to ``diag=None``. 

49 if isinstance(x_scale, str) and x_scale == 'jac': 

50 diag = None 

51 else: 

52 diag = 1 / x_scale 

53 

54 full_output = True 

55 col_deriv = False 

56 factor = 100.0 

57 

58 if jac is None: 

59 if max_nfev is None: 

60 # n squared to account for Jacobian evaluations. 

61 max_nfev = 100 * n * (n + 1) 

62 x, info, status = _minpack._lmdif( 

63 fun, x0, (), full_output, ftol, xtol, gtol, 

64 max_nfev, epsfcn, factor, diag) 

65 else: 

66 if max_nfev is None: 

67 max_nfev = 100 * n 

68 x, info, status = _minpack._lmder( 

69 fun, jac, x0, (), full_output, col_deriv, 

70 ftol, xtol, gtol, max_nfev, factor, diag) 

71 

72 f = info['fvec'] 

73 

74 if callable(jac): 

75 J = jac(x) 

76 else: 

77 J = np.atleast_2d(approx_derivative(fun, x)) 

78 

79 cost = 0.5 * np.dot(f, f) 

80 g = J.T.dot(f) 

81 g_norm = norm(g, ord=np.inf) 

82 

83 nfev = info['nfev'] 

84 njev = info.get('njev', None) 

85 

86 status = FROM_MINPACK_TO_COMMON[status] 

87 active_mask = np.zeros_like(x0, dtype=int) 

88 

89 return OptimizeResult( 

90 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm, 

91 active_mask=active_mask, nfev=nfev, njev=njev, status=status) 

92 

93 

94def prepare_bounds(bounds, n): 

95 lb, ub = [np.asarray(b, dtype=float) for b in bounds] 

96 if lb.ndim == 0: 

97 lb = np.resize(lb, n) 

98 

99 if ub.ndim == 0: 

100 ub = np.resize(ub, n) 

101 

102 return lb, ub 

103 

104 

105def check_tolerance(ftol, xtol, gtol, method): 

106 def check(tol, name): 

107 if tol is None: 

108 tol = 0 

109 elif tol < EPS: 

110 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively " 

111 "disables the corresponding termination condition." 

112 .format(name, EPS)) 

113 return tol 

114 

115 ftol = check(ftol, "ftol") 

116 xtol = check(xtol, "xtol") 

117 gtol = check(gtol, "gtol") 

118 

119 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS): 

120 raise ValueError("All tolerances must be higher than machine epsilon " 

121 "({:.2e}) for method 'lm'.".format(EPS)) 

122 elif ftol < EPS and xtol < EPS and gtol < EPS: 

123 raise ValueError("At least one of the tolerances must be higher than " 

124 "machine epsilon ({:.2e}).".format(EPS)) 

125 

126 return ftol, xtol, gtol 

127 

128 

129def check_x_scale(x_scale, x0): 

130 if isinstance(x_scale, str) and x_scale == 'jac': 

131 return x_scale 

132 

133 try: 

134 x_scale = np.asarray(x_scale, dtype=float) 

135 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0) 

136 except (ValueError, TypeError): 

137 valid = False 

138 

139 if not valid: 

140 raise ValueError("`x_scale` must be 'jac' or array_like with " 

141 "positive numbers.") 

142 

143 if x_scale.ndim == 0: 

144 x_scale = np.resize(x_scale, x0.shape) 

145 

146 if x_scale.shape != x0.shape: 

147 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.") 

148 

149 return x_scale 

150 

151 

152def check_jac_sparsity(jac_sparsity, m, n): 

153 if jac_sparsity is None: 

154 return None 

155 

156 if not issparse(jac_sparsity): 

157 jac_sparsity = np.atleast_2d(jac_sparsity) 

158 

159 if jac_sparsity.shape != (m, n): 

160 raise ValueError("`jac_sparsity` has wrong shape.") 

161 

162 return jac_sparsity, group_columns(jac_sparsity) 

163 

164 

165# Loss functions. 

166 

167 

168def huber(z, rho, cost_only): 

169 mask = z <= 1 

170 rho[0, mask] = z[mask] 

171 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1 

172 if cost_only: 

173 return 

174 rho[1, mask] = 1 

175 rho[1, ~mask] = z[~mask]**-0.5 

176 rho[2, mask] = 0 

177 rho[2, ~mask] = -0.5 * z[~mask]**-1.5 

178 

179 

180def soft_l1(z, rho, cost_only): 

181 t = 1 + z 

182 rho[0] = 2 * (t**0.5 - 1) 

183 if cost_only: 

184 return 

185 rho[1] = t**-0.5 

186 rho[2] = -0.5 * t**-1.5 

187 

188 

189def cauchy(z, rho, cost_only): 

190 rho[0] = np.log1p(z) 

191 if cost_only: 

192 return 

193 t = 1 + z 

194 rho[1] = 1 / t 

195 rho[2] = -1 / t**2 

196 

197 

198def arctan(z, rho, cost_only): 

199 rho[0] = np.arctan(z) 

200 if cost_only: 

201 return 

202 t = 1 + z**2 

203 rho[1] = 1 / t 

204 rho[2] = -2 * z / t**2 

205 

206 

207IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1, 

208 cauchy=cauchy, arctan=arctan) 

209 

210 

211def construct_loss_function(m, loss, f_scale): 

212 if loss == 'linear': 

213 return None 

214 

215 if not callable(loss): 

216 loss = IMPLEMENTED_LOSSES[loss] 

217 rho = np.empty((3, m)) 

218 

219 def loss_function(f, cost_only=False): 

220 z = (f / f_scale) ** 2 

221 loss(z, rho, cost_only=cost_only) 

222 if cost_only: 

223 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 

224 rho[0] *= f_scale ** 2 

225 rho[2] /= f_scale ** 2 

226 return rho 

227 else: 

228 def loss_function(f, cost_only=False): 

229 z = (f / f_scale) ** 2 

230 rho = loss(z) 

231 if cost_only: 

232 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 

233 rho[0] *= f_scale ** 2 

234 rho[2] /= f_scale ** 2 

235 return rho 

236 

237 return loss_function 

238 

239 

240def least_squares( 

241 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf', 

242 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear', 

243 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, 

244 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}): 

245 """Solve a nonlinear least-squares problem with bounds on the variables. 

246 

247 Given the residuals f(x) (an m-D real function of n real 

248 variables) and the loss function rho(s) (a scalar function), `least_squares` 

249 finds a local minimum of the cost function F(x):: 

250 

251 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1) 

252 subject to lb <= x <= ub 

253 

254 The purpose of the loss function rho(s) is to reduce the influence of 

255 outliers on the solution. 

256 

257 Parameters 

258 ---------- 

259 fun : callable 

260 Function which computes the vector of residuals, with the signature 

261 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with 

262 respect to its first argument. The argument ``x`` passed to this 

263 function is an ndarray of shape (n,) (never a scalar, even for n=1). 

264 It must allocate and return a 1-D array_like of shape (m,) or a scalar. 

265 If the argument ``x`` is complex or the function ``fun`` returns 

266 complex residuals, it must be wrapped in a real function of real 

267 arguments, as shown at the end of the Examples section. 

268 x0 : array_like with shape (n,) or float 

269 Initial guess on independent variables. If float, it will be treated 

270 as a 1-D array with one element. 

271 jac : {'2-point', '3-point', 'cs', callable}, optional 

272 Method of computing the Jacobian matrix (an m-by-n matrix, where 

273 element (i, j) is the partial derivative of f[i] with respect to 

274 x[j]). The keywords select a finite difference scheme for numerical 

275 estimation. The scheme '3-point' is more accurate, but requires 

276 twice as many operations as '2-point' (default). The scheme 'cs' 

277 uses complex steps, and while potentially the most accurate, it is 

278 applicable only when `fun` correctly handles complex inputs and 

279 can be analytically continued to the complex plane. Method 'lm' 

280 always uses the '2-point' scheme. If callable, it is used as 

281 ``jac(x, *args, **kwargs)`` and should return a good approximation 

282 (or the exact value) for the Jacobian as an array_like (np.atleast_2d 

283 is applied), a sparse matrix or a `scipy.sparse.linalg.LinearOperator`. 

284 bounds : 2-tuple of array_like, optional 

285 Lower and upper bounds on independent variables. Defaults to no bounds. 

286 Each array must match the size of `x0` or be a scalar, in the latter 

287 case a bound will be the same for all variables. Use ``np.inf`` with 

288 an appropriate sign to disable bounds on all or some variables. 

289 method : {'trf', 'dogbox', 'lm'}, optional 

290 Algorithm to perform minimization. 

291 

292 * 'trf' : Trust Region Reflective algorithm, particularly suitable 

293 for large sparse problems with bounds. Generally robust method. 

294 * 'dogbox' : dogleg algorithm with rectangular trust regions, 

295 typical use case is small problems with bounds. Not recommended 

296 for problems with rank-deficient Jacobian. 

297 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK. 

298 Doesn't handle bounds and sparse Jacobians. Usually the most 

299 efficient method for small unconstrained problems. 

300 

301 Default is 'trf'. See Notes for more information. 

302 ftol : float or None, optional 

303 Tolerance for termination by the change of the cost function. Default 

304 is 1e-8. The optimization process is stopped when ``dF < ftol * F``, 

305 and there was an adequate agreement between a local quadratic model and 

306 the true model in the last step. If None, the termination by this 

307 condition is disabled. 

308 xtol : float or None, optional 

309 Tolerance for termination by the change of the independent variables. 

310 Default is 1e-8. The exact condition depends on the `method` used: 

311 

312 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``. 

313 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is 

314 a trust-region radius and ``xs`` is the value of ``x`` 

315 scaled according to `x_scale` parameter (see below). 

316 

317 If None, the termination by this condition is disabled. 

318 gtol : float or None, optional 

319 Tolerance for termination by the norm of the gradient. Default is 1e-8. 

320 The exact condition depends on a `method` used: 

321 

322 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where 

323 ``g_scaled`` is the value of the gradient scaled to account for 

324 the presence of the bounds [STIR]_. 

325 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where 

326 ``g_free`` is the gradient with respect to the variables which 

327 are not in the optimal state on the boundary. 

328 * For 'lm' : the maximum absolute value of the cosine of angles 

329 between columns of the Jacobian and the residual vector is less 

330 than `gtol`, or the residual vector is zero. 

331 

332 If None, the termination by this condition is disabled. 

333 x_scale : array_like or 'jac', optional 

334 Characteristic scale of each variable. Setting `x_scale` is equivalent 

335 to reformulating the problem in scaled variables ``xs = x / x_scale``. 

336 An alternative view is that the size of a trust region along jth 

337 dimension is proportional to ``x_scale[j]``. Improved convergence may 

338 be achieved by setting `x_scale` such that a step of a given size 

339 along any of the scaled variables has a similar effect on the cost 

340 function. If set to 'jac', the scale is iteratively updated using the 

341 inverse norms of the columns of the Jacobian matrix (as described in 

342 [JJMore]_). 

343 loss : str or callable, optional 

344 Determines the loss function. The following keyword values are allowed: 

345 

346 * 'linear' (default) : ``rho(z) = z``. Gives a standard 

347 least-squares problem. 

348 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth 

349 approximation of l1 (absolute value) loss. Usually a good 

350 choice for robust least squares. 

351 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works 

352 similarly to 'soft_l1'. 

353 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers 

354 influence, but may cause difficulties in optimization process. 

355 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on 

356 a single residual, has properties similar to 'cauchy'. 

357 

358 If callable, it must take a 1-D ndarray ``z=f**2`` and return an 

359 array_like with shape (3, m) where row 0 contains function values, 

360 row 1 contains first derivatives and row 2 contains second 

361 derivatives. Method 'lm' supports only 'linear' loss. 

362 f_scale : float, optional 

363 Value of soft margin between inlier and outlier residuals, default 

364 is 1.0. The loss function is evaluated as follows 

365 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`, 

366 and ``rho`` is determined by `loss` parameter. This parameter has 

367 no effect with ``loss='linear'``, but for other `loss` values it is 

368 of crucial importance. 

369 max_nfev : None or int, optional 

370 Maximum number of function evaluations before the termination. 

371 If None (default), the value is chosen automatically: 

372 

373 * For 'trf' and 'dogbox' : 100 * n. 

374 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1) 

375 otherwise (because 'lm' counts function calls in Jacobian 

376 estimation). 

377 

378 diff_step : None or array_like, optional 

379 Determines the relative step size for the finite difference 

380 approximation of the Jacobian. The actual step is computed as 

381 ``x * diff_step``. If None (default), then `diff_step` is taken to be 

382 a conventional "optimal" power of machine epsilon for the finite 

383 difference scheme used [NR]_. 

384 tr_solver : {None, 'exact', 'lsmr'}, optional 

385 Method for solving trust-region subproblems, relevant only for 'trf' 

386 and 'dogbox' methods. 

387 

388 * 'exact' is suitable for not very large problems with dense 

389 Jacobian matrices. The computational complexity per iteration is 

390 comparable to a singular value decomposition of the Jacobian 

391 matrix. 

392 * 'lsmr' is suitable for problems with sparse and large Jacobian 

393 matrices. It uses the iterative procedure 

394 `scipy.sparse.linalg.lsmr` for finding a solution of a linear 

395 least-squares problem and only requires matrix-vector product 

396 evaluations. 

397 

398 If None (default), the solver is chosen based on the type of Jacobian 

399 returned on the first iteration. 

400 tr_options : dict, optional 

401 Keyword options passed to trust-region solver. 

402 

403 * ``tr_solver='exact'``: `tr_options` are ignored. 

404 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`. 

405 Additionally, ``method='trf'`` supports 'regularize' option 

406 (bool, default is True), which adds a regularization term to the 

407 normal equation, which improves convergence if the Jacobian is 

408 rank-deficient [Byrd]_ (eq. 3.4). 

409 

410 jac_sparsity : {None, array_like, sparse matrix}, optional 

411 Defines the sparsity structure of the Jacobian matrix for finite 

412 difference estimation, its shape must be (m, n). If the Jacobian has 

413 only few non-zero elements in *each* row, providing the sparsity 

414 structure will greatly speed up the computations [Curtis]_. A zero 

415 entry means that a corresponding element in the Jacobian is identically 

416 zero. If provided, forces the use of 'lsmr' trust-region solver. 

417 If None (default), then dense differencing will be used. Has no effect 

418 for 'lm' method. 

419 verbose : {0, 1, 2}, optional 

420 Level of algorithm's verbosity: 

421 

422 * 0 (default) : work silently. 

423 * 1 : display a termination report. 

424 * 2 : display progress during iterations (not supported by 'lm' 

425 method). 

426 

427 args, kwargs : tuple and dict, optional 

428 Additional arguments passed to `fun` and `jac`. Both empty by default. 

429 The calling signature is ``fun(x, *args, **kwargs)`` and the same for 

430 `jac`. 

431 

432 Returns 

433 ------- 

434 `OptimizeResult` with the following fields defined: 

435 x : ndarray, shape (n,) 

436 Solution found. 

437 cost : float 

438 Value of the cost function at the solution. 

439 fun : ndarray, shape (m,) 

440 Vector of residuals at the solution. 

441 jac : ndarray, sparse matrix or LinearOperator, shape (m, n) 

442 Modified Jacobian matrix at the solution, in the sense that J^T J 

443 is a Gauss-Newton approximation of the Hessian of the cost function. 

444 The type is the same as the one used by the algorithm. 

445 grad : ndarray, shape (m,) 

446 Gradient of the cost function at the solution. 

447 optimality : float 

448 First-order optimality measure. In unconstrained problems, it is always 

449 the uniform norm of the gradient. In constrained problems, it is the 

450 quantity which was compared with `gtol` during iterations. 

451 active_mask : ndarray of int, shape (n,) 

452 Each component shows whether a corresponding constraint is active 

453 (that is, whether a variable is at the bound): 

454 

455 * 0 : a constraint is not active. 

456 * -1 : a lower bound is active. 

457 * 1 : an upper bound is active. 

458 

459 Might be somewhat arbitrary for 'trf' method as it generates a sequence 

460 of strictly feasible iterates and `active_mask` is determined within a 

461 tolerance threshold. 

462 nfev : int 

463 Number of function evaluations done. Methods 'trf' and 'dogbox' do not 

464 count function calls for numerical Jacobian approximation, as opposed 

465 to 'lm' method. 

466 njev : int or None 

467 Number of Jacobian evaluations done. If numerical Jacobian 

468 approximation is used in 'lm' method, it is set to None. 

469 status : int 

470 The reason for algorithm termination: 

471 

472 * -1 : improper input parameters status returned from MINPACK. 

473 * 0 : the maximum number of function evaluations is exceeded. 

474 * 1 : `gtol` termination condition is satisfied. 

475 * 2 : `ftol` termination condition is satisfied. 

476 * 3 : `xtol` termination condition is satisfied. 

477 * 4 : Both `ftol` and `xtol` termination conditions are satisfied. 

478 

479 message : str 

480 Verbal description of the termination reason. 

481 success : bool 

482 True if one of the convergence criteria is satisfied (`status` > 0). 

483 

484 See Also 

485 -------- 

486 leastsq : A legacy wrapper for the MINPACK implementation of the 

487 Levenberg-Marquadt algorithm. 

488 curve_fit : Least-squares minimization applied to a curve-fitting problem. 

489 

490 Notes 

491 ----- 

492 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares 

493 algorithms implemented in MINPACK (lmder, lmdif). It runs the 

494 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm. 

495 The implementation is based on paper [JJMore]_, it is very robust and 

496 efficient with a lot of smart tricks. It should be your first choice 

497 for unconstrained problems. Note that it doesn't support bounds. Also, 

498 it doesn't work when m < n. 

499 

500 Method 'trf' (Trust Region Reflective) is motivated by the process of 

501 solving a system of equations, which constitute the first-order optimality 

502 condition for a bound-constrained minimization problem as formulated in 

503 [STIR]_. The algorithm iteratively solves trust-region subproblems 

504 augmented by a special diagonal quadratic term and with trust-region shape 

505 determined by the distance from the bounds and the direction of the 

506 gradient. This enhancements help to avoid making steps directly into bounds 

507 and efficiently explore the whole space of variables. To further improve 

508 convergence, the algorithm considers search directions reflected from the 

509 bounds. To obey theoretical requirements, the algorithm keeps iterates 

510 strictly feasible. With dense Jacobians trust-region subproblems are 

511 solved by an exact method very similar to the one described in [JJMore]_ 

512 (and implemented in MINPACK). The difference from the MINPACK 

513 implementation is that a singular value decomposition of a Jacobian 

514 matrix is done once per iteration, instead of a QR decomposition and series 

515 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace 

516 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_. 

517 The subspace is spanned by a scaled gradient and an approximate 

518 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no 

519 constraints are imposed the algorithm is very similar to MINPACK and has 

520 generally comparable performance. The algorithm works quite robust in 

521 unbounded and bounded problems, thus it is chosen as a default algorithm. 

522 

523 Method 'dogbox' operates in a trust-region framework, but considers 

524 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_. 

525 The intersection of a current trust region and initial bounds is again 

526 rectangular, so on each iteration a quadratic minimization problem subject 

527 to bound constraints is solved approximately by Powell's dogleg method 

528 [NumOpt]_. The required Gauss-Newton step can be computed exactly for 

529 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large 

530 sparse Jacobians. The algorithm is likely to exhibit slow convergence when 

531 the rank of Jacobian is less than the number of variables. The algorithm 

532 often outperforms 'trf' in bounded problems with a small number of 

533 variables. 

534 

535 Robust loss functions are implemented as described in [BA]_. The idea 

536 is to modify a residual vector and a Jacobian matrix on each iteration 

537 such that computed gradient and Gauss-Newton Hessian approximation match 

538 the true gradient and Hessian approximation of the cost function. Then 

539 the algorithm proceeds in a normal way, i.e., robust loss functions are 

540 implemented as a simple wrapper over standard least-squares algorithms. 

541 

542 .. versionadded:: 0.17.0 

543 

544 References 

545 ---------- 

546 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, 

547 and Conjugate Gradient Method for Large-Scale Bound-Constrained 

548 Minimization Problems," SIAM Journal on Scientific Computing, 

549 Vol. 21, Number 1, pp 1-23, 1999. 

550 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific 

551 Computing. 3rd edition", Sec. 5.7. 

552 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate 

553 solution of the trust region problem by minimization over 

554 two-dimensional subspaces", Math. Programming, 40, pp. 247-263, 

555 1988. 

556 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of 

557 sparse Jacobian matrices", Journal of the Institute of 

558 Mathematics and its Applications, 13, pp. 117-120, 1974. 

559 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation 

560 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture 

561 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977. 

562 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region 

563 Dogleg Approach for Unconstrained and Bound Constrained 

564 Nonlinear Optimization", WSEAS International Conference on 

565 Applied Mathematics, Corfu, Greece, 2004. 

566 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 

567 2nd edition", Chapter 4. 

568 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis", 

569 Proceedings of the International Workshop on Vision Algorithms: 

570 Theory and Practice, pp. 298-372, 1999. 

571 

572 Examples 

573 -------- 

574 In this example we find a minimum of the Rosenbrock function without bounds 

575 on independent variables. 

576 

577 >>> def fun_rosenbrock(x): 

578 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])]) 

579 

580 Notice that we only provide the vector of the residuals. The algorithm 

581 constructs the cost function as a sum of squares of the residuals, which 

582 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``. 

583 

584 >>> from scipy.optimize import least_squares 

585 >>> x0_rosenbrock = np.array([2, 2]) 

586 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock) 

587 >>> res_1.x 

588 array([ 1., 1.]) 

589 >>> res_1.cost 

590 9.8669242910846867e-30 

591 >>> res_1.optimality 

592 8.8928864934219529e-14 

593 

594 We now constrain the variables, in such a way that the previous solution 

595 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and 

596 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter 

597 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``. 

598 

599 We also provide the analytic Jacobian: 

600 

601 >>> def jac_rosenbrock(x): 

602 ... return np.array([ 

603 ... [-20 * x[0], 10], 

604 ... [-1, 0]]) 

605 

606 Putting this all together, we see that the new solution lies on the bound: 

607 

608 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock, 

609 ... bounds=([-np.inf, 1.5], np.inf)) 

610 >>> res_2.x 

611 array([ 1.22437075, 1.5 ]) 

612 >>> res_2.cost 

613 0.025213093946805685 

614 >>> res_2.optimality 

615 1.5885401433157753e-07 

616 

617 Now we solve a system of equations (i.e., the cost function should be zero 

618 at a minimum) for a Broyden tridiagonal vector-valued function of 100000 

619 variables: 

620 

621 >>> def fun_broyden(x): 

622 ... f = (3 - x) * x + 1 

623 ... f[1:] -= x[:-1] 

624 ... f[:-1] -= 2 * x[1:] 

625 ... return f 

626 

627 The corresponding Jacobian matrix is sparse. We tell the algorithm to 

628 estimate it by finite differences and provide the sparsity structure of 

629 Jacobian to significantly speed up this process. 

630 

631 >>> from scipy.sparse import lil_matrix 

632 >>> def sparsity_broyden(n): 

633 ... sparsity = lil_matrix((n, n), dtype=int) 

634 ... i = np.arange(n) 

635 ... sparsity[i, i] = 1 

636 ... i = np.arange(1, n) 

637 ... sparsity[i, i - 1] = 1 

638 ... i = np.arange(n - 1) 

639 ... sparsity[i, i + 1] = 1 

640 ... return sparsity 

641 ... 

642 >>> n = 100000 

643 >>> x0_broyden = -np.ones(n) 

644 ... 

645 >>> res_3 = least_squares(fun_broyden, x0_broyden, 

646 ... jac_sparsity=sparsity_broyden(n)) 

647 >>> res_3.cost 

648 4.5687069299604613e-23 

649 >>> res_3.optimality 

650 1.1650454296851518e-11 

651 

652 Let's also solve a curve fitting problem using robust loss function to 

653 take care of outliers in the data. Define the model function as 

654 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an 

655 observation and a, b, c are parameters to estimate. 

656 

657 First, define the function which generates the data with noise and 

658 outliers, define the model parameters, and generate data: 

659 

660 >>> def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0): 

661 ... y = a + b * np.exp(t * c) 

662 ... 

663 ... rnd = np.random.RandomState(random_state) 

664 ... error = noise * rnd.randn(t.size) 

665 ... outliers = rnd.randint(0, t.size, n_outliers) 

666 ... error[outliers] *= 10 

667 ... 

668 ... return y + error 

669 ... 

670 >>> a = 0.5 

671 >>> b = 2.0 

672 >>> c = -1 

673 >>> t_min = 0 

674 >>> t_max = 10 

675 >>> n_points = 15 

676 ... 

677 >>> t_train = np.linspace(t_min, t_max, n_points) 

678 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3) 

679 

680 Define function for computing residuals and initial estimate of 

681 parameters. 

682 

683 >>> def fun(x, t, y): 

684 ... return x[0] + x[1] * np.exp(x[2] * t) - y 

685 ... 

686 >>> x0 = np.array([1.0, 1.0, 0.0]) 

687 

688 Compute a standard least-squares solution: 

689 

690 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train)) 

691 

692 Now compute two solutions with two different robust loss functions. The 

693 parameter `f_scale` is set to 0.1, meaning that inlier residuals should 

694 not significantly exceed 0.1 (the noise level used). 

695 

696 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1, 

697 ... args=(t_train, y_train)) 

698 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1, 

699 ... args=(t_train, y_train)) 

700 

701 And, finally, plot all the curves. We see that by selecting an appropriate 

702 `loss` we can get estimates close to optimal even in the presence of 

703 strong outliers. But keep in mind that generally it is recommended to try 

704 'soft_l1' or 'huber' losses first (if at all necessary) as the other two 

705 options may cause difficulties in optimization process. 

706 

707 >>> t_test = np.linspace(t_min, t_max, n_points * 10) 

708 >>> y_true = gen_data(t_test, a, b, c) 

709 >>> y_lsq = gen_data(t_test, *res_lsq.x) 

710 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x) 

711 >>> y_log = gen_data(t_test, *res_log.x) 

712 ... 

713 >>> import matplotlib.pyplot as plt 

714 >>> plt.plot(t_train, y_train, 'o') 

715 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true') 

716 >>> plt.plot(t_test, y_lsq, label='linear loss') 

717 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss') 

718 >>> plt.plot(t_test, y_log, label='cauchy loss') 

719 >>> plt.xlabel("t") 

720 >>> plt.ylabel("y") 

721 >>> plt.legend() 

722 >>> plt.show() 

723 

724 In the next example, we show how complex-valued residual functions of 

725 complex variables can be optimized with ``least_squares()``. Consider the 

726 following function: 

727 

728 >>> def f(z): 

729 ... return z - (0.5 + 0.5j) 

730 

731 We wrap it into a function of real variables that returns real residuals 

732 by simply handling the real and imaginary parts as independent variables: 

733 

734 >>> def f_wrap(x): 

735 ... fx = f(x[0] + 1j*x[1]) 

736 ... return np.array([fx.real, fx.imag]) 

737 

738 Thus, instead of the original m-D complex function of n complex 

739 variables we optimize a 2m-D real function of 2n real variables: 

740 

741 >>> from scipy.optimize import least_squares 

742 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1])) 

743 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j 

744 >>> z 

745 (0.49999999999925893+0.49999999999925893j) 

746 

747 """ 

748 if method not in ['trf', 'dogbox', 'lm']: 

749 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.") 

750 

751 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac): 

752 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or " 

753 "callable.") 

754 

755 if tr_solver not in [None, 'exact', 'lsmr']: 

756 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.") 

757 

758 if loss not in IMPLEMENTED_LOSSES and not callable(loss): 

759 raise ValueError("`loss` must be one of {0} or a callable." 

760 .format(IMPLEMENTED_LOSSES.keys())) 

761 

762 if method == 'lm' and loss != 'linear': 

763 raise ValueError("method='lm' supports only 'linear' loss function.") 

764 

765 if verbose not in [0, 1, 2]: 

766 raise ValueError("`verbose` must be in [0, 1, 2].") 

767 

768 if len(bounds) != 2: 

769 raise ValueError("`bounds` must contain 2 elements.") 

770 

771 if max_nfev is not None and max_nfev <= 0: 

772 raise ValueError("`max_nfev` must be None or positive integer.") 

773 

774 if np.iscomplexobj(x0): 

775 raise ValueError("`x0` must be real.") 

776 

777 x0 = np.atleast_1d(x0).astype(float) 

778 

779 if x0.ndim > 1: 

780 raise ValueError("`x0` must have at most 1 dimension.") 

781 

782 lb, ub = prepare_bounds(bounds, x0.shape[0]) 

783 

784 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)): 

785 raise ValueError("Method 'lm' doesn't support bounds.") 

786 

787 if lb.shape != x0.shape or ub.shape != x0.shape: 

788 raise ValueError("Inconsistent shapes between bounds and `x0`.") 

789 

790 if np.any(lb >= ub): 

791 raise ValueError("Each lower bound must be strictly less than each " 

792 "upper bound.") 

793 

794 if not in_bounds(x0, lb, ub): 

795 raise ValueError("`x0` is infeasible.") 

796 

797 x_scale = check_x_scale(x_scale, x0) 

798 

799 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method) 

800 

801 def fun_wrapped(x): 

802 return np.atleast_1d(fun(x, *args, **kwargs)) 

803 

804 if method == 'trf': 

805 x0 = make_strictly_feasible(x0, lb, ub) 

806 

807 f0 = fun_wrapped(x0) 

808 

809 if f0.ndim != 1: 

810 raise ValueError("`fun` must return at most 1-d array_like. " 

811 "f0.shape: {0}".format(f0.shape)) 

812 

813 if not np.all(np.isfinite(f0)): 

814 raise ValueError("Residuals are not finite in the initial point.") 

815 

816 n = x0.size 

817 m = f0.size 

818 

819 if method == 'lm' and m < n: 

820 raise ValueError("Method 'lm' doesn't work when the number of " 

821 "residuals is less than the number of variables.") 

822 

823 loss_function = construct_loss_function(m, loss, f_scale) 

824 if callable(loss): 

825 rho = loss_function(f0) 

826 if rho.shape != (3, m): 

827 raise ValueError("The return value of `loss` callable has wrong " 

828 "shape.") 

829 initial_cost = 0.5 * np.sum(rho[0]) 

830 elif loss_function is not None: 

831 initial_cost = loss_function(f0, cost_only=True) 

832 else: 

833 initial_cost = 0.5 * np.dot(f0, f0) 

834 

835 if callable(jac): 

836 J0 = jac(x0, *args, **kwargs) 

837 

838 if issparse(J0): 

839 J0 = csr_matrix(J0) 

840 

841 def jac_wrapped(x, _=None): 

842 return csr_matrix(jac(x, *args, **kwargs)) 

843 

844 elif isinstance(J0, LinearOperator): 

845 def jac_wrapped(x, _=None): 

846 return jac(x, *args, **kwargs) 

847 

848 else: 

849 J0 = np.atleast_2d(J0) 

850 

851 def jac_wrapped(x, _=None): 

852 return np.atleast_2d(jac(x, *args, **kwargs)) 

853 

854 else: # Estimate Jacobian by finite differences. 

855 if method == 'lm': 

856 if jac_sparsity is not None: 

857 raise ValueError("method='lm' does not support " 

858 "`jac_sparsity`.") 

859 

860 if jac != '2-point': 

861 warn("jac='{0}' works equivalently to '2-point' " 

862 "for method='lm'.".format(jac)) 

863 

864 J0 = jac_wrapped = None 

865 else: 

866 if jac_sparsity is not None and tr_solver == 'exact': 

867 raise ValueError("tr_solver='exact' is incompatible " 

868 "with `jac_sparsity`.") 

869 

870 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n) 

871 

872 def jac_wrapped(x, f): 

873 J = approx_derivative(fun, x, rel_step=diff_step, method=jac, 

874 f0=f, bounds=bounds, args=args, 

875 kwargs=kwargs, sparsity=jac_sparsity) 

876 if J.ndim != 2: # J is guaranteed not sparse. 

877 J = np.atleast_2d(J) 

878 

879 return J 

880 

881 J0 = jac_wrapped(x0, f0) 

882 

883 if J0 is not None: 

884 if J0.shape != (m, n): 

885 raise ValueError( 

886 "The return value of `jac` has wrong shape: expected {0}, " 

887 "actual {1}.".format((m, n), J0.shape)) 

888 

889 if not isinstance(J0, np.ndarray): 

890 if method == 'lm': 

891 raise ValueError("method='lm' works only with dense " 

892 "Jacobian matrices.") 

893 

894 if tr_solver == 'exact': 

895 raise ValueError( 

896 "tr_solver='exact' works only with dense " 

897 "Jacobian matrices.") 

898 

899 jac_scale = isinstance(x_scale, str) and x_scale == 'jac' 

900 if isinstance(J0, LinearOperator) and jac_scale: 

901 raise ValueError("x_scale='jac' can't be used when `jac` " 

902 "returns LinearOperator.") 

903 

904 if tr_solver is None: 

905 if isinstance(J0, np.ndarray): 

906 tr_solver = 'exact' 

907 else: 

908 tr_solver = 'lsmr' 

909 

910 if method == 'lm': 

911 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol, 

912 max_nfev, x_scale, diff_step) 

913 

914 elif method == 'trf': 

915 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol, 

916 gtol, max_nfev, x_scale, loss_function, tr_solver, 

917 tr_options.copy(), verbose) 

918 

919 elif method == 'dogbox': 

920 if tr_solver == 'lsmr' and 'regularize' in tr_options: 

921 warn("The keyword 'regularize' in `tr_options` is not relevant " 

922 "for 'dogbox' method.") 

923 tr_options = tr_options.copy() 

924 del tr_options['regularize'] 

925 

926 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, 

927 xtol, gtol, max_nfev, x_scale, loss_function, 

928 tr_solver, tr_options, verbose) 

929 

930 result.message = TERMINATION_MESSAGES[result.status] 

931 result.success = result.status > 0 

932 

933 if verbose >= 1: 

934 print(result.message) 

935 print("Function evaluations {0}, initial cost {1:.4e}, final cost " 

936 "{2:.4e}, first-order optimality {3:.2e}." 

937 .format(result.nfev, initial_cost, result.cost, 

938 result.optimality)) 

939 

940 return result