Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/optimize/_lsq/least_squares.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Generic interface for least-squares minimization."""
2from warnings import warn
4import numpy as np
5from numpy.linalg import norm
7from scipy.sparse import issparse, csr_matrix
8from scipy.sparse.linalg import LinearOperator
9from scipy.optimize import _minpack, OptimizeResult
10from scipy.optimize._numdiff import approx_derivative, group_columns
12from .trf import trf
13from .dogbox import dogbox
14from .common import EPS, in_bounds, make_strictly_feasible
17TERMINATION_MESSAGES = {
18 -1: "Improper input parameters status returned from `leastsq`",
19 0: "The maximum number of function evaluations is exceeded.",
20 1: "`gtol` termination condition is satisfied.",
21 2: "`ftol` termination condition is satisfied.",
22 3: "`xtol` termination condition is satisfied.",
23 4: "Both `ftol` and `xtol` termination conditions are satisfied."
24}
27FROM_MINPACK_TO_COMMON = {
28 0: -1, # Improper input parameters from MINPACK.
29 1: 2,
30 2: 3,
31 3: 4,
32 4: 1,
33 5: 0
34 # There are 6, 7, 8 for too small tolerance parameters,
35 # but we guard against it by checking ftol, xtol, gtol beforehand.
36}
39def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
40 n = x0.size
42 if diff_step is None:
43 epsfcn = EPS
44 else:
45 epsfcn = diff_step**2
47 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and
48 # ``x_scale='jac'`` corresponds to ``diag=None``.
49 if isinstance(x_scale, str) and x_scale == 'jac':
50 diag = None
51 else:
52 diag = 1 / x_scale
54 full_output = True
55 col_deriv = False
56 factor = 100.0
58 if jac is None:
59 if max_nfev is None:
60 # n squared to account for Jacobian evaluations.
61 max_nfev = 100 * n * (n + 1)
62 x, info, status = _minpack._lmdif(
63 fun, x0, (), full_output, ftol, xtol, gtol,
64 max_nfev, epsfcn, factor, diag)
65 else:
66 if max_nfev is None:
67 max_nfev = 100 * n
68 x, info, status = _minpack._lmder(
69 fun, jac, x0, (), full_output, col_deriv,
70 ftol, xtol, gtol, max_nfev, factor, diag)
72 f = info['fvec']
74 if callable(jac):
75 J = jac(x)
76 else:
77 J = np.atleast_2d(approx_derivative(fun, x))
79 cost = 0.5 * np.dot(f, f)
80 g = J.T.dot(f)
81 g_norm = norm(g, ord=np.inf)
83 nfev = info['nfev']
84 njev = info.get('njev', None)
86 status = FROM_MINPACK_TO_COMMON[status]
87 active_mask = np.zeros_like(x0, dtype=int)
89 return OptimizeResult(
90 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
91 active_mask=active_mask, nfev=nfev, njev=njev, status=status)
94def prepare_bounds(bounds, n):
95 lb, ub = [np.asarray(b, dtype=float) for b in bounds]
96 if lb.ndim == 0:
97 lb = np.resize(lb, n)
99 if ub.ndim == 0:
100 ub = np.resize(ub, n)
102 return lb, ub
105def check_tolerance(ftol, xtol, gtol, method):
106 def check(tol, name):
107 if tol is None:
108 tol = 0
109 elif tol < EPS:
110 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
111 "disables the corresponding termination condition."
112 .format(name, EPS))
113 return tol
115 ftol = check(ftol, "ftol")
116 xtol = check(xtol, "xtol")
117 gtol = check(gtol, "gtol")
119 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
120 raise ValueError("All tolerances must be higher than machine epsilon "
121 "({:.2e}) for method 'lm'.".format(EPS))
122 elif ftol < EPS and xtol < EPS and gtol < EPS:
123 raise ValueError("At least one of the tolerances must be higher than "
124 "machine epsilon ({:.2e}).".format(EPS))
126 return ftol, xtol, gtol
129def check_x_scale(x_scale, x0):
130 if isinstance(x_scale, str) and x_scale == 'jac':
131 return x_scale
133 try:
134 x_scale = np.asarray(x_scale, dtype=float)
135 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
136 except (ValueError, TypeError):
137 valid = False
139 if not valid:
140 raise ValueError("`x_scale` must be 'jac' or array_like with "
141 "positive numbers.")
143 if x_scale.ndim == 0:
144 x_scale = np.resize(x_scale, x0.shape)
146 if x_scale.shape != x0.shape:
147 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
149 return x_scale
152def check_jac_sparsity(jac_sparsity, m, n):
153 if jac_sparsity is None:
154 return None
156 if not issparse(jac_sparsity):
157 jac_sparsity = np.atleast_2d(jac_sparsity)
159 if jac_sparsity.shape != (m, n):
160 raise ValueError("`jac_sparsity` has wrong shape.")
162 return jac_sparsity, group_columns(jac_sparsity)
165# Loss functions.
168def huber(z, rho, cost_only):
169 mask = z <= 1
170 rho[0, mask] = z[mask]
171 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
172 if cost_only:
173 return
174 rho[1, mask] = 1
175 rho[1, ~mask] = z[~mask]**-0.5
176 rho[2, mask] = 0
177 rho[2, ~mask] = -0.5 * z[~mask]**-1.5
180def soft_l1(z, rho, cost_only):
181 t = 1 + z
182 rho[0] = 2 * (t**0.5 - 1)
183 if cost_only:
184 return
185 rho[1] = t**-0.5
186 rho[2] = -0.5 * t**-1.5
189def cauchy(z, rho, cost_only):
190 rho[0] = np.log1p(z)
191 if cost_only:
192 return
193 t = 1 + z
194 rho[1] = 1 / t
195 rho[2] = -1 / t**2
198def arctan(z, rho, cost_only):
199 rho[0] = np.arctan(z)
200 if cost_only:
201 return
202 t = 1 + z**2
203 rho[1] = 1 / t
204 rho[2] = -2 * z / t**2
207IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
208 cauchy=cauchy, arctan=arctan)
211def construct_loss_function(m, loss, f_scale):
212 if loss == 'linear':
213 return None
215 if not callable(loss):
216 loss = IMPLEMENTED_LOSSES[loss]
217 rho = np.empty((3, m))
219 def loss_function(f, cost_only=False):
220 z = (f / f_scale) ** 2
221 loss(z, rho, cost_only=cost_only)
222 if cost_only:
223 return 0.5 * f_scale ** 2 * np.sum(rho[0])
224 rho[0] *= f_scale ** 2
225 rho[2] /= f_scale ** 2
226 return rho
227 else:
228 def loss_function(f, cost_only=False):
229 z = (f / f_scale) ** 2
230 rho = loss(z)
231 if cost_only:
232 return 0.5 * f_scale ** 2 * np.sum(rho[0])
233 rho[0] *= f_scale ** 2
234 rho[2] /= f_scale ** 2
235 return rho
237 return loss_function
240def least_squares(
241 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
242 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
243 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
244 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
245 """Solve a nonlinear least-squares problem with bounds on the variables.
247 Given the residuals f(x) (an m-D real function of n real
248 variables) and the loss function rho(s) (a scalar function), `least_squares`
249 finds a local minimum of the cost function F(x)::
251 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
252 subject to lb <= x <= ub
254 The purpose of the loss function rho(s) is to reduce the influence of
255 outliers on the solution.
257 Parameters
258 ----------
259 fun : callable
260 Function which computes the vector of residuals, with the signature
261 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
262 respect to its first argument. The argument ``x`` passed to this
263 function is an ndarray of shape (n,) (never a scalar, even for n=1).
264 It must allocate and return a 1-D array_like of shape (m,) or a scalar.
265 If the argument ``x`` is complex or the function ``fun`` returns
266 complex residuals, it must be wrapped in a real function of real
267 arguments, as shown at the end of the Examples section.
268 x0 : array_like with shape (n,) or float
269 Initial guess on independent variables. If float, it will be treated
270 as a 1-D array with one element.
271 jac : {'2-point', '3-point', 'cs', callable}, optional
272 Method of computing the Jacobian matrix (an m-by-n matrix, where
273 element (i, j) is the partial derivative of f[i] with respect to
274 x[j]). The keywords select a finite difference scheme for numerical
275 estimation. The scheme '3-point' is more accurate, but requires
276 twice as many operations as '2-point' (default). The scheme 'cs'
277 uses complex steps, and while potentially the most accurate, it is
278 applicable only when `fun` correctly handles complex inputs and
279 can be analytically continued to the complex plane. Method 'lm'
280 always uses the '2-point' scheme. If callable, it is used as
281 ``jac(x, *args, **kwargs)`` and should return a good approximation
282 (or the exact value) for the Jacobian as an array_like (np.atleast_2d
283 is applied), a sparse matrix or a `scipy.sparse.linalg.LinearOperator`.
284 bounds : 2-tuple of array_like, optional
285 Lower and upper bounds on independent variables. Defaults to no bounds.
286 Each array must match the size of `x0` or be a scalar, in the latter
287 case a bound will be the same for all variables. Use ``np.inf`` with
288 an appropriate sign to disable bounds on all or some variables.
289 method : {'trf', 'dogbox', 'lm'}, optional
290 Algorithm to perform minimization.
292 * 'trf' : Trust Region Reflective algorithm, particularly suitable
293 for large sparse problems with bounds. Generally robust method.
294 * 'dogbox' : dogleg algorithm with rectangular trust regions,
295 typical use case is small problems with bounds. Not recommended
296 for problems with rank-deficient Jacobian.
297 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
298 Doesn't handle bounds and sparse Jacobians. Usually the most
299 efficient method for small unconstrained problems.
301 Default is 'trf'. See Notes for more information.
302 ftol : float or None, optional
303 Tolerance for termination by the change of the cost function. Default
304 is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
305 and there was an adequate agreement between a local quadratic model and
306 the true model in the last step. If None, the termination by this
307 condition is disabled.
308 xtol : float or None, optional
309 Tolerance for termination by the change of the independent variables.
310 Default is 1e-8. The exact condition depends on the `method` used:
312 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
313 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
314 a trust-region radius and ``xs`` is the value of ``x``
315 scaled according to `x_scale` parameter (see below).
317 If None, the termination by this condition is disabled.
318 gtol : float or None, optional
319 Tolerance for termination by the norm of the gradient. Default is 1e-8.
320 The exact condition depends on a `method` used:
322 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
323 ``g_scaled`` is the value of the gradient scaled to account for
324 the presence of the bounds [STIR]_.
325 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
326 ``g_free`` is the gradient with respect to the variables which
327 are not in the optimal state on the boundary.
328 * For 'lm' : the maximum absolute value of the cosine of angles
329 between columns of the Jacobian and the residual vector is less
330 than `gtol`, or the residual vector is zero.
332 If None, the termination by this condition is disabled.
333 x_scale : array_like or 'jac', optional
334 Characteristic scale of each variable. Setting `x_scale` is equivalent
335 to reformulating the problem in scaled variables ``xs = x / x_scale``.
336 An alternative view is that the size of a trust region along jth
337 dimension is proportional to ``x_scale[j]``. Improved convergence may
338 be achieved by setting `x_scale` such that a step of a given size
339 along any of the scaled variables has a similar effect on the cost
340 function. If set to 'jac', the scale is iteratively updated using the
341 inverse norms of the columns of the Jacobian matrix (as described in
342 [JJMore]_).
343 loss : str or callable, optional
344 Determines the loss function. The following keyword values are allowed:
346 * 'linear' (default) : ``rho(z) = z``. Gives a standard
347 least-squares problem.
348 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
349 approximation of l1 (absolute value) loss. Usually a good
350 choice for robust least squares.
351 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
352 similarly to 'soft_l1'.
353 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
354 influence, but may cause difficulties in optimization process.
355 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
356 a single residual, has properties similar to 'cauchy'.
358 If callable, it must take a 1-D ndarray ``z=f**2`` and return an
359 array_like with shape (3, m) where row 0 contains function values,
360 row 1 contains first derivatives and row 2 contains second
361 derivatives. Method 'lm' supports only 'linear' loss.
362 f_scale : float, optional
363 Value of soft margin between inlier and outlier residuals, default
364 is 1.0. The loss function is evaluated as follows
365 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
366 and ``rho`` is determined by `loss` parameter. This parameter has
367 no effect with ``loss='linear'``, but for other `loss` values it is
368 of crucial importance.
369 max_nfev : None or int, optional
370 Maximum number of function evaluations before the termination.
371 If None (default), the value is chosen automatically:
373 * For 'trf' and 'dogbox' : 100 * n.
374 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)
375 otherwise (because 'lm' counts function calls in Jacobian
376 estimation).
378 diff_step : None or array_like, optional
379 Determines the relative step size for the finite difference
380 approximation of the Jacobian. The actual step is computed as
381 ``x * diff_step``. If None (default), then `diff_step` is taken to be
382 a conventional "optimal" power of machine epsilon for the finite
383 difference scheme used [NR]_.
384 tr_solver : {None, 'exact', 'lsmr'}, optional
385 Method for solving trust-region subproblems, relevant only for 'trf'
386 and 'dogbox' methods.
388 * 'exact' is suitable for not very large problems with dense
389 Jacobian matrices. The computational complexity per iteration is
390 comparable to a singular value decomposition of the Jacobian
391 matrix.
392 * 'lsmr' is suitable for problems with sparse and large Jacobian
393 matrices. It uses the iterative procedure
394 `scipy.sparse.linalg.lsmr` for finding a solution of a linear
395 least-squares problem and only requires matrix-vector product
396 evaluations.
398 If None (default), the solver is chosen based on the type of Jacobian
399 returned on the first iteration.
400 tr_options : dict, optional
401 Keyword options passed to trust-region solver.
403 * ``tr_solver='exact'``: `tr_options` are ignored.
404 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
405 Additionally, ``method='trf'`` supports 'regularize' option
406 (bool, default is True), which adds a regularization term to the
407 normal equation, which improves convergence if the Jacobian is
408 rank-deficient [Byrd]_ (eq. 3.4).
410 jac_sparsity : {None, array_like, sparse matrix}, optional
411 Defines the sparsity structure of the Jacobian matrix for finite
412 difference estimation, its shape must be (m, n). If the Jacobian has
413 only few non-zero elements in *each* row, providing the sparsity
414 structure will greatly speed up the computations [Curtis]_. A zero
415 entry means that a corresponding element in the Jacobian is identically
416 zero. If provided, forces the use of 'lsmr' trust-region solver.
417 If None (default), then dense differencing will be used. Has no effect
418 for 'lm' method.
419 verbose : {0, 1, 2}, optional
420 Level of algorithm's verbosity:
422 * 0 (default) : work silently.
423 * 1 : display a termination report.
424 * 2 : display progress during iterations (not supported by 'lm'
425 method).
427 args, kwargs : tuple and dict, optional
428 Additional arguments passed to `fun` and `jac`. Both empty by default.
429 The calling signature is ``fun(x, *args, **kwargs)`` and the same for
430 `jac`.
432 Returns
433 -------
434 `OptimizeResult` with the following fields defined:
435 x : ndarray, shape (n,)
436 Solution found.
437 cost : float
438 Value of the cost function at the solution.
439 fun : ndarray, shape (m,)
440 Vector of residuals at the solution.
441 jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
442 Modified Jacobian matrix at the solution, in the sense that J^T J
443 is a Gauss-Newton approximation of the Hessian of the cost function.
444 The type is the same as the one used by the algorithm.
445 grad : ndarray, shape (m,)
446 Gradient of the cost function at the solution.
447 optimality : float
448 First-order optimality measure. In unconstrained problems, it is always
449 the uniform norm of the gradient. In constrained problems, it is the
450 quantity which was compared with `gtol` during iterations.
451 active_mask : ndarray of int, shape (n,)
452 Each component shows whether a corresponding constraint is active
453 (that is, whether a variable is at the bound):
455 * 0 : a constraint is not active.
456 * -1 : a lower bound is active.
457 * 1 : an upper bound is active.
459 Might be somewhat arbitrary for 'trf' method as it generates a sequence
460 of strictly feasible iterates and `active_mask` is determined within a
461 tolerance threshold.
462 nfev : int
463 Number of function evaluations done. Methods 'trf' and 'dogbox' do not
464 count function calls for numerical Jacobian approximation, as opposed
465 to 'lm' method.
466 njev : int or None
467 Number of Jacobian evaluations done. If numerical Jacobian
468 approximation is used in 'lm' method, it is set to None.
469 status : int
470 The reason for algorithm termination:
472 * -1 : improper input parameters status returned from MINPACK.
473 * 0 : the maximum number of function evaluations is exceeded.
474 * 1 : `gtol` termination condition is satisfied.
475 * 2 : `ftol` termination condition is satisfied.
476 * 3 : `xtol` termination condition is satisfied.
477 * 4 : Both `ftol` and `xtol` termination conditions are satisfied.
479 message : str
480 Verbal description of the termination reason.
481 success : bool
482 True if one of the convergence criteria is satisfied (`status` > 0).
484 See Also
485 --------
486 leastsq : A legacy wrapper for the MINPACK implementation of the
487 Levenberg-Marquadt algorithm.
488 curve_fit : Least-squares minimization applied to a curve-fitting problem.
490 Notes
491 -----
492 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
493 algorithms implemented in MINPACK (lmder, lmdif). It runs the
494 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
495 The implementation is based on paper [JJMore]_, it is very robust and
496 efficient with a lot of smart tricks. It should be your first choice
497 for unconstrained problems. Note that it doesn't support bounds. Also,
498 it doesn't work when m < n.
500 Method 'trf' (Trust Region Reflective) is motivated by the process of
501 solving a system of equations, which constitute the first-order optimality
502 condition for a bound-constrained minimization problem as formulated in
503 [STIR]_. The algorithm iteratively solves trust-region subproblems
504 augmented by a special diagonal quadratic term and with trust-region shape
505 determined by the distance from the bounds and the direction of the
506 gradient. This enhancements help to avoid making steps directly into bounds
507 and efficiently explore the whole space of variables. To further improve
508 convergence, the algorithm considers search directions reflected from the
509 bounds. To obey theoretical requirements, the algorithm keeps iterates
510 strictly feasible. With dense Jacobians trust-region subproblems are
511 solved by an exact method very similar to the one described in [JJMore]_
512 (and implemented in MINPACK). The difference from the MINPACK
513 implementation is that a singular value decomposition of a Jacobian
514 matrix is done once per iteration, instead of a QR decomposition and series
515 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
516 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
517 The subspace is spanned by a scaled gradient and an approximate
518 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
519 constraints are imposed the algorithm is very similar to MINPACK and has
520 generally comparable performance. The algorithm works quite robust in
521 unbounded and bounded problems, thus it is chosen as a default algorithm.
523 Method 'dogbox' operates in a trust-region framework, but considers
524 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
525 The intersection of a current trust region and initial bounds is again
526 rectangular, so on each iteration a quadratic minimization problem subject
527 to bound constraints is solved approximately by Powell's dogleg method
528 [NumOpt]_. The required Gauss-Newton step can be computed exactly for
529 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
530 sparse Jacobians. The algorithm is likely to exhibit slow convergence when
531 the rank of Jacobian is less than the number of variables. The algorithm
532 often outperforms 'trf' in bounded problems with a small number of
533 variables.
535 Robust loss functions are implemented as described in [BA]_. The idea
536 is to modify a residual vector and a Jacobian matrix on each iteration
537 such that computed gradient and Gauss-Newton Hessian approximation match
538 the true gradient and Hessian approximation of the cost function. Then
539 the algorithm proceeds in a normal way, i.e., robust loss functions are
540 implemented as a simple wrapper over standard least-squares algorithms.
542 .. versionadded:: 0.17.0
544 References
545 ----------
546 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
547 and Conjugate Gradient Method for Large-Scale Bound-Constrained
548 Minimization Problems," SIAM Journal on Scientific Computing,
549 Vol. 21, Number 1, pp 1-23, 1999.
550 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
551 Computing. 3rd edition", Sec. 5.7.
552 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
553 solution of the trust region problem by minimization over
554 two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
555 1988.
556 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
557 sparse Jacobian matrices", Journal of the Institute of
558 Mathematics and its Applications, 13, pp. 117-120, 1974.
559 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
560 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
561 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
562 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
563 Dogleg Approach for Unconstrained and Bound Constrained
564 Nonlinear Optimization", WSEAS International Conference on
565 Applied Mathematics, Corfu, Greece, 2004.
566 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
567 2nd edition", Chapter 4.
568 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
569 Proceedings of the International Workshop on Vision Algorithms:
570 Theory and Practice, pp. 298-372, 1999.
572 Examples
573 --------
574 In this example we find a minimum of the Rosenbrock function without bounds
575 on independent variables.
577 >>> def fun_rosenbrock(x):
578 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
580 Notice that we only provide the vector of the residuals. The algorithm
581 constructs the cost function as a sum of squares of the residuals, which
582 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
584 >>> from scipy.optimize import least_squares
585 >>> x0_rosenbrock = np.array([2, 2])
586 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
587 >>> res_1.x
588 array([ 1., 1.])
589 >>> res_1.cost
590 9.8669242910846867e-30
591 >>> res_1.optimality
592 8.8928864934219529e-14
594 We now constrain the variables, in such a way that the previous solution
595 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
596 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
597 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
599 We also provide the analytic Jacobian:
601 >>> def jac_rosenbrock(x):
602 ... return np.array([
603 ... [-20 * x[0], 10],
604 ... [-1, 0]])
606 Putting this all together, we see that the new solution lies on the bound:
608 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
609 ... bounds=([-np.inf, 1.5], np.inf))
610 >>> res_2.x
611 array([ 1.22437075, 1.5 ])
612 >>> res_2.cost
613 0.025213093946805685
614 >>> res_2.optimality
615 1.5885401433157753e-07
617 Now we solve a system of equations (i.e., the cost function should be zero
618 at a minimum) for a Broyden tridiagonal vector-valued function of 100000
619 variables:
621 >>> def fun_broyden(x):
622 ... f = (3 - x) * x + 1
623 ... f[1:] -= x[:-1]
624 ... f[:-1] -= 2 * x[1:]
625 ... return f
627 The corresponding Jacobian matrix is sparse. We tell the algorithm to
628 estimate it by finite differences and provide the sparsity structure of
629 Jacobian to significantly speed up this process.
631 >>> from scipy.sparse import lil_matrix
632 >>> def sparsity_broyden(n):
633 ... sparsity = lil_matrix((n, n), dtype=int)
634 ... i = np.arange(n)
635 ... sparsity[i, i] = 1
636 ... i = np.arange(1, n)
637 ... sparsity[i, i - 1] = 1
638 ... i = np.arange(n - 1)
639 ... sparsity[i, i + 1] = 1
640 ... return sparsity
641 ...
642 >>> n = 100000
643 >>> x0_broyden = -np.ones(n)
644 ...
645 >>> res_3 = least_squares(fun_broyden, x0_broyden,
646 ... jac_sparsity=sparsity_broyden(n))
647 >>> res_3.cost
648 4.5687069299604613e-23
649 >>> res_3.optimality
650 1.1650454296851518e-11
652 Let's also solve a curve fitting problem using robust loss function to
653 take care of outliers in the data. Define the model function as
654 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
655 observation and a, b, c are parameters to estimate.
657 First, define the function which generates the data with noise and
658 outliers, define the model parameters, and generate data:
660 >>> def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
661 ... y = a + b * np.exp(t * c)
662 ...
663 ... rnd = np.random.RandomState(random_state)
664 ... error = noise * rnd.randn(t.size)
665 ... outliers = rnd.randint(0, t.size, n_outliers)
666 ... error[outliers] *= 10
667 ...
668 ... return y + error
669 ...
670 >>> a = 0.5
671 >>> b = 2.0
672 >>> c = -1
673 >>> t_min = 0
674 >>> t_max = 10
675 >>> n_points = 15
676 ...
677 >>> t_train = np.linspace(t_min, t_max, n_points)
678 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
680 Define function for computing residuals and initial estimate of
681 parameters.
683 >>> def fun(x, t, y):
684 ... return x[0] + x[1] * np.exp(x[2] * t) - y
685 ...
686 >>> x0 = np.array([1.0, 1.0, 0.0])
688 Compute a standard least-squares solution:
690 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
692 Now compute two solutions with two different robust loss functions. The
693 parameter `f_scale` is set to 0.1, meaning that inlier residuals should
694 not significantly exceed 0.1 (the noise level used).
696 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
697 ... args=(t_train, y_train))
698 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
699 ... args=(t_train, y_train))
701 And, finally, plot all the curves. We see that by selecting an appropriate
702 `loss` we can get estimates close to optimal even in the presence of
703 strong outliers. But keep in mind that generally it is recommended to try
704 'soft_l1' or 'huber' losses first (if at all necessary) as the other two
705 options may cause difficulties in optimization process.
707 >>> t_test = np.linspace(t_min, t_max, n_points * 10)
708 >>> y_true = gen_data(t_test, a, b, c)
709 >>> y_lsq = gen_data(t_test, *res_lsq.x)
710 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
711 >>> y_log = gen_data(t_test, *res_log.x)
712 ...
713 >>> import matplotlib.pyplot as plt
714 >>> plt.plot(t_train, y_train, 'o')
715 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
716 >>> plt.plot(t_test, y_lsq, label='linear loss')
717 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
718 >>> plt.plot(t_test, y_log, label='cauchy loss')
719 >>> plt.xlabel("t")
720 >>> plt.ylabel("y")
721 >>> plt.legend()
722 >>> plt.show()
724 In the next example, we show how complex-valued residual functions of
725 complex variables can be optimized with ``least_squares()``. Consider the
726 following function:
728 >>> def f(z):
729 ... return z - (0.5 + 0.5j)
731 We wrap it into a function of real variables that returns real residuals
732 by simply handling the real and imaginary parts as independent variables:
734 >>> def f_wrap(x):
735 ... fx = f(x[0] + 1j*x[1])
736 ... return np.array([fx.real, fx.imag])
738 Thus, instead of the original m-D complex function of n complex
739 variables we optimize a 2m-D real function of 2n real variables:
741 >>> from scipy.optimize import least_squares
742 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
743 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
744 >>> z
745 (0.49999999999925893+0.49999999999925893j)
747 """
748 if method not in ['trf', 'dogbox', 'lm']:
749 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
751 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
752 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
753 "callable.")
755 if tr_solver not in [None, 'exact', 'lsmr']:
756 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
758 if loss not in IMPLEMENTED_LOSSES and not callable(loss):
759 raise ValueError("`loss` must be one of {0} or a callable."
760 .format(IMPLEMENTED_LOSSES.keys()))
762 if method == 'lm' and loss != 'linear':
763 raise ValueError("method='lm' supports only 'linear' loss function.")
765 if verbose not in [0, 1, 2]:
766 raise ValueError("`verbose` must be in [0, 1, 2].")
768 if len(bounds) != 2:
769 raise ValueError("`bounds` must contain 2 elements.")
771 if max_nfev is not None and max_nfev <= 0:
772 raise ValueError("`max_nfev` must be None or positive integer.")
774 if np.iscomplexobj(x0):
775 raise ValueError("`x0` must be real.")
777 x0 = np.atleast_1d(x0).astype(float)
779 if x0.ndim > 1:
780 raise ValueError("`x0` must have at most 1 dimension.")
782 lb, ub = prepare_bounds(bounds, x0.shape[0])
784 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
785 raise ValueError("Method 'lm' doesn't support bounds.")
787 if lb.shape != x0.shape or ub.shape != x0.shape:
788 raise ValueError("Inconsistent shapes between bounds and `x0`.")
790 if np.any(lb >= ub):
791 raise ValueError("Each lower bound must be strictly less than each "
792 "upper bound.")
794 if not in_bounds(x0, lb, ub):
795 raise ValueError("`x0` is infeasible.")
797 x_scale = check_x_scale(x_scale, x0)
799 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
801 def fun_wrapped(x):
802 return np.atleast_1d(fun(x, *args, **kwargs))
804 if method == 'trf':
805 x0 = make_strictly_feasible(x0, lb, ub)
807 f0 = fun_wrapped(x0)
809 if f0.ndim != 1:
810 raise ValueError("`fun` must return at most 1-d array_like. "
811 "f0.shape: {0}".format(f0.shape))
813 if not np.all(np.isfinite(f0)):
814 raise ValueError("Residuals are not finite in the initial point.")
816 n = x0.size
817 m = f0.size
819 if method == 'lm' and m < n:
820 raise ValueError("Method 'lm' doesn't work when the number of "
821 "residuals is less than the number of variables.")
823 loss_function = construct_loss_function(m, loss, f_scale)
824 if callable(loss):
825 rho = loss_function(f0)
826 if rho.shape != (3, m):
827 raise ValueError("The return value of `loss` callable has wrong "
828 "shape.")
829 initial_cost = 0.5 * np.sum(rho[0])
830 elif loss_function is not None:
831 initial_cost = loss_function(f0, cost_only=True)
832 else:
833 initial_cost = 0.5 * np.dot(f0, f0)
835 if callable(jac):
836 J0 = jac(x0, *args, **kwargs)
838 if issparse(J0):
839 J0 = csr_matrix(J0)
841 def jac_wrapped(x, _=None):
842 return csr_matrix(jac(x, *args, **kwargs))
844 elif isinstance(J0, LinearOperator):
845 def jac_wrapped(x, _=None):
846 return jac(x, *args, **kwargs)
848 else:
849 J0 = np.atleast_2d(J0)
851 def jac_wrapped(x, _=None):
852 return np.atleast_2d(jac(x, *args, **kwargs))
854 else: # Estimate Jacobian by finite differences.
855 if method == 'lm':
856 if jac_sparsity is not None:
857 raise ValueError("method='lm' does not support "
858 "`jac_sparsity`.")
860 if jac != '2-point':
861 warn("jac='{0}' works equivalently to '2-point' "
862 "for method='lm'.".format(jac))
864 J0 = jac_wrapped = None
865 else:
866 if jac_sparsity is not None and tr_solver == 'exact':
867 raise ValueError("tr_solver='exact' is incompatible "
868 "with `jac_sparsity`.")
870 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
872 def jac_wrapped(x, f):
873 J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
874 f0=f, bounds=bounds, args=args,
875 kwargs=kwargs, sparsity=jac_sparsity)
876 if J.ndim != 2: # J is guaranteed not sparse.
877 J = np.atleast_2d(J)
879 return J
881 J0 = jac_wrapped(x0, f0)
883 if J0 is not None:
884 if J0.shape != (m, n):
885 raise ValueError(
886 "The return value of `jac` has wrong shape: expected {0}, "
887 "actual {1}.".format((m, n), J0.shape))
889 if not isinstance(J0, np.ndarray):
890 if method == 'lm':
891 raise ValueError("method='lm' works only with dense "
892 "Jacobian matrices.")
894 if tr_solver == 'exact':
895 raise ValueError(
896 "tr_solver='exact' works only with dense "
897 "Jacobian matrices.")
899 jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
900 if isinstance(J0, LinearOperator) and jac_scale:
901 raise ValueError("x_scale='jac' can't be used when `jac` "
902 "returns LinearOperator.")
904 if tr_solver is None:
905 if isinstance(J0, np.ndarray):
906 tr_solver = 'exact'
907 else:
908 tr_solver = 'lsmr'
910 if method == 'lm':
911 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
912 max_nfev, x_scale, diff_step)
914 elif method == 'trf':
915 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
916 gtol, max_nfev, x_scale, loss_function, tr_solver,
917 tr_options.copy(), verbose)
919 elif method == 'dogbox':
920 if tr_solver == 'lsmr' and 'regularize' in tr_options:
921 warn("The keyword 'regularize' in `tr_options` is not relevant "
922 "for 'dogbox' method.")
923 tr_options = tr_options.copy()
924 del tr_options['regularize']
926 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
927 xtol, gtol, max_nfev, x_scale, loss_function,
928 tr_solver, tr_options, verbose)
930 result.message = TERMINATION_MESSAGES[result.status]
931 result.success = result.status > 0
933 if verbose >= 1:
934 print(result.message)
935 print("Function evaluations {0}, initial cost {1:.4e}, final cost "
936 "{2:.4e}, first-order optimality {3:.2e}."
937 .format(result.nfev, initial_cost, result.cost,
938 result.optimality))
940 return result