Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Functions
3---------
4.. autosummary::
5 :toctree: generated/
7 fmin_l_bfgs_b
9"""
11## License for the Python wrapper
12## ==============================
14## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
16## Permission is hereby granted, free of charge, to any person obtaining a
17## copy of this software and associated documentation files (the "Software"),
18## to deal in the Software without restriction, including without limitation
19## the rights to use, copy, modify, merge, publish, distribute, sublicense,
20## and/or sell copies of the Software, and to permit persons to whom the
21## Software is furnished to do so, subject to the following conditions:
23## The above copyright notice and this permission notice shall be included in
24## all copies or substantial portions of the Software.
26## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32## DEALINGS IN THE SOFTWARE.
34## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
36import numpy as np
37from numpy import array, asarray, float64, zeros
38from . import _lbfgsb
39from .optimize import (MemoizeJac, OptimizeResult,
40 _check_unknown_options, _prepare_scalar_function)
41from ._constraints import old_bound_to_new
43from scipy.sparse.linalg import LinearOperator
45__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
48def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
49 approx_grad=0,
50 bounds=None, m=10, factr=1e7, pgtol=1e-5,
51 epsilon=1e-8,
52 iprint=-1, maxfun=15000, maxiter=15000, disp=None,
53 callback=None, maxls=20):
54 """
55 Minimize a function func using the L-BFGS-B algorithm.
57 Parameters
58 ----------
59 func : callable f(x,*args)
60 Function to minimize.
61 x0 : ndarray
62 Initial guess.
63 fprime : callable fprime(x,*args), optional
64 The gradient of `func`. If None, then `func` returns the function
65 value and the gradient (``f, g = func(x, *args)``), unless
66 `approx_grad` is True in which case `func` returns only ``f``.
67 args : sequence, optional
68 Arguments to pass to `func` and `fprime`.
69 approx_grad : bool, optional
70 Whether to approximate the gradient numerically (in which case
71 `func` returns only the function value).
72 bounds : list, optional
73 ``(min, max)`` pairs for each element in ``x``, defining
74 the bounds on that parameter. Use None or +-inf for one of ``min`` or
75 ``max`` when there is no bound in that direction.
76 m : int, optional
77 The maximum number of variable metric corrections
78 used to define the limited memory matrix. (The limited memory BFGS
79 method does not store the full hessian but uses this many terms in an
80 approximation to it.)
81 factr : float, optional
82 The iteration stops when
83 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
84 where ``eps`` is the machine precision, which is automatically
85 generated by the code. Typical values for `factr` are: 1e12 for
86 low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
87 high accuracy. See Notes for relationship to `ftol`, which is exposed
88 (instead of `factr`) by the `scipy.optimize.minimize` interface to
89 L-BFGS-B.
90 pgtol : float, optional
91 The iteration will stop when
92 ``max{|proj g_i | i = 1, ..., n} <= pgtol``
93 where ``pg_i`` is the i-th component of the projected gradient.
94 epsilon : float, optional
95 Step size used when `approx_grad` is True, for numerically
96 calculating the gradient
97 iprint : int, optional
98 Controls the frequency of output. ``iprint < 0`` means no output;
99 ``iprint = 0`` print only one line at the last iteration;
100 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
101 ``iprint = 99`` print details of every iteration except n-vectors;
102 ``iprint = 100`` print also the changes of active set and final x;
103 ``iprint > 100`` print details of every iteration including x and g.
104 disp : int, optional
105 If zero, then no output. If a positive number, then this over-rides
106 `iprint` (i.e., `iprint` gets the value of `disp`).
107 maxfun : int, optional
108 Maximum number of function evaluations.
109 maxiter : int, optional
110 Maximum number of iterations.
111 callback : callable, optional
112 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
113 current parameter vector.
114 maxls : int, optional
115 Maximum number of line search steps (per iteration). Default is 20.
117 Returns
118 -------
119 x : array_like
120 Estimated position of the minimum.
121 f : float
122 Value of `func` at the minimum.
123 d : dict
124 Information dictionary.
126 * d['warnflag'] is
128 - 0 if converged,
129 - 1 if too many function evaluations or too many iterations,
130 - 2 if stopped for another reason, given in d['task']
132 * d['grad'] is the gradient at the minimum (should be 0 ish)
133 * d['funcalls'] is the number of function calls made.
134 * d['nit'] is the number of iterations.
136 See also
137 --------
138 minimize: Interface to minimization algorithms for multivariate
139 functions. See the 'L-BFGS-B' `method` in particular. Note that the
140 `ftol` option is made available via that interface, while `factr` is
141 provided via this interface, where `factr` is the factor multiplying
142 the default machine floating-point precision to arrive at `ftol`:
143 ``ftol = factr * numpy.finfo(float).eps``.
145 Notes
146 -----
147 License of L-BFGS-B (FORTRAN code):
149 The version included here (in fortran code) is 3.0
150 (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
151 and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
152 condition for use:
154 This software is freely available, but we expect that all publications
155 describing work using this software, or all commercial products using it,
156 quote at least one of the references given below. This software is released
157 under the BSD License.
159 References
160 ----------
161 * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
162 Constrained Optimization, (1995), SIAM Journal on Scientific and
163 Statistical Computing, 16, 5, pp. 1190-1208.
164 * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
165 FORTRAN routines for large scale bound constrained optimization (1997),
166 ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
167 * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
168 FORTRAN routines for large scale bound constrained optimization (2011),
169 ACM Transactions on Mathematical Software, 38, 1.
171 """
172 # handle fprime/approx_grad
173 if approx_grad:
174 fun = func
175 jac = None
176 elif fprime is None:
177 fun = MemoizeJac(func)
178 jac = fun.derivative
179 else:
180 fun = func
181 jac = fprime
183 # build options
184 if disp is None:
185 disp = iprint
186 opts = {'disp': disp,
187 'iprint': iprint,
188 'maxcor': m,
189 'ftol': factr * np.finfo(float).eps,
190 'gtol': pgtol,
191 'eps': epsilon,
192 'maxfun': maxfun,
193 'maxiter': maxiter,
194 'callback': callback,
195 'maxls': maxls}
197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
198 **opts)
199 d = {'grad': res['jac'],
200 'task': res['message'],
201 'funcalls': res['nfev'],
202 'nit': res['nit'],
203 'warnflag': res['status']}
204 f = res['fun']
205 x = res['x']
207 return x, f, d
210def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
211 disp=None, maxcor=10, ftol=2.2204460492503131e-09,
212 gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
213 iprint=-1, callback=None, maxls=20,
214 finite_diff_rel_step=None, **unknown_options):
215 """
216 Minimize a scalar function of one or more variables using the L-BFGS-B
217 algorithm.
219 Options
220 -------
221 disp : None or int
222 If `disp is None` (the default), then the supplied version of `iprint`
223 is used. If `disp is not None`, then it overrides the supplied version
224 of `iprint` with the behaviour you outlined.
225 maxcor : int
226 The maximum number of variable metric corrections used to
227 define the limited memory matrix. (The limited memory BFGS
228 method does not store the full hessian but uses this many terms
229 in an approximation to it.)
230 ftol : float
231 The iteration stops when ``(f^k -
232 f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
233 gtol : float
234 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
235 <= gtol`` where ``pg_i`` is the i-th component of the
236 projected gradient.
237 eps : float or ndarray
238 If `jac is None` the absolute step size used for numerical
239 approximation of the jacobian via forward differences.
240 maxfun : int
241 Maximum number of function evaluations.
242 maxiter : int
243 Maximum number of iterations.
244 iprint : int, optional
245 Controls the frequency of output. ``iprint < 0`` means no output;
246 ``iprint = 0`` print only one line at the last iteration;
247 ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
248 ``iprint = 99`` print details of every iteration except n-vectors;
249 ``iprint = 100`` print also the changes of active set and final x;
250 ``iprint > 100`` print details of every iteration including x and g.
251 callback : callable, optional
252 Called after each iteration, as ``callback(xk)``, where ``xk`` is the
253 current parameter vector.
254 maxls : int, optional
255 Maximum number of line search steps (per iteration). Default is 20.
256 finite_diff_rel_step : None or array_like, optional
257 If `jac in ['2-point', '3-point', 'cs']` the relative step size to
258 use for numerical approximation of the jacobian. The absolute step
259 size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``,
260 possibly adjusted to fit into the bounds. For ``method='3-point'``
261 the sign of `h` is ignored. If None (default) then step is selected
262 automatically.
264 Notes
265 -----
266 The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
267 but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
268 relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
269 I.e., `factr` multiplies the default machine floating-point precision to
270 arrive at `ftol`.
272 """
273 _check_unknown_options(unknown_options)
274 m = maxcor
275 pgtol = gtol
276 factr = ftol / np.finfo(float).eps
278 x0 = asarray(x0).ravel()
279 n, = x0.shape
281 if bounds is None:
282 bounds = [(None, None)] * n
283 if len(bounds) != n:
284 raise ValueError('length of x0 != length of bounds')
286 # unbounded variables must use None, not +-inf, for optimizer to work properly
287 bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]
288 # LBFGSB is sent 'old-style' bounds, 'new-style' bounds are required by
289 # approx_derivative and ScalarFunction
290 new_bounds = old_bound_to_new(bounds)
292 # check bounds
293 if (new_bounds[0] > new_bounds[1]).any():
294 raise ValueError("LBFGSB - one of the lower bounds is greater than an upper bound.")
296 # initial vector must lie within the bounds. Otherwise ScalarFunction and
297 # approx_derivative will cause problems
298 x0 = np.clip(x0, new_bounds[0], new_bounds[1])
300 if disp is not None:
301 if disp == 0:
302 iprint = -1
303 else:
304 iprint = disp
306 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
307 bounds=new_bounds,
308 finite_diff_rel_step=finite_diff_rel_step)
310 func_and_grad = sf.fun_and_grad
312 fortran_int = _lbfgsb.types.intvar.dtype
314 nbd = zeros(n, fortran_int)
315 low_bnd = zeros(n, float64)
316 upper_bnd = zeros(n, float64)
317 bounds_map = {(None, None): 0,
318 (1, None): 1,
319 (1, 1): 2,
320 (None, 1): 3}
321 for i in range(0, n):
322 l, u = bounds[i]
323 if l is not None:
324 low_bnd[i] = l
325 l = 1
326 if u is not None:
327 upper_bnd[i] = u
328 u = 1
329 nbd[i] = bounds_map[l, u]
331 if not maxls > 0:
332 raise ValueError('maxls must be positive.')
334 x = array(x0, float64)
335 f = array(0.0, float64)
336 g = zeros((n,), float64)
337 wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
338 iwa = zeros(3*n, fortran_int)
339 task = zeros(1, 'S60')
340 csave = zeros(1, 'S60')
341 lsave = zeros(4, fortran_int)
342 isave = zeros(44, fortran_int)
343 dsave = zeros(29, float64)
345 task[:] = 'START'
347 n_iterations = 0
349 while 1:
350 # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
351 _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
352 pgtol, wa, iwa, task, iprint, csave, lsave,
353 isave, dsave, maxls)
354 task_str = task.tobytes()
355 if task_str.startswith(b'FG'):
356 # The minimization routine wants f and g at the current x.
357 # Note that interruptions due to maxfun are postponed
358 # until the completion of the current minimization iteration.
359 # Overwrite f and g:
360 f, g = func_and_grad(x)
361 elif task_str.startswith(b'NEW_X'):
362 # new iteration
363 n_iterations += 1
364 if callback is not None:
365 callback(np.copy(x))
367 if n_iterations >= maxiter:
368 task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
369 elif sf.nfev > maxfun:
370 task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
371 'EXCEEDS LIMIT')
372 else:
373 break
375 task_str = task.tobytes().strip(b'\x00').strip()
376 if task_str.startswith(b'CONV'):
377 warnflag = 0
378 elif sf.nfev > maxfun or n_iterations >= maxiter:
379 warnflag = 1
380 else:
381 warnflag = 2
383 # These two portions of the workspace are described in the mainlb
384 # subroutine in lbfgsb.f. See line 363.
385 s = wa[0: m*n].reshape(m, n)
386 y = wa[m*n: 2*m*n].reshape(m, n)
388 # See lbfgsb.f line 160 for this portion of the workspace.
389 # isave(31) = the total number of BFGS updates prior the current iteration;
390 n_bfgs_updates = isave[30]
392 n_corrs = min(n_bfgs_updates, maxcor)
393 hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
395 return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
396 njev=sf.ngev,
397 nit=n_iterations, status=warnflag, message=task_str,
398 x=x, success=(warnflag == 0), hess_inv=hess_inv)
401class LbfgsInvHessProduct(LinearOperator):
402 """Linear operator for the L-BFGS approximate inverse Hessian.
404 This operator computes the product of a vector with the approximate inverse
405 of the Hessian of the objective function, using the L-BFGS limited
406 memory approximation to the inverse Hessian, accumulated during the
407 optimization.
409 Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
410 interface.
412 Parameters
413 ----------
414 sk : array_like, shape=(n_corr, n)
415 Array of `n_corr` most recent updates to the solution vector.
416 (See [1]).
417 yk : array_like, shape=(n_corr, n)
418 Array of `n_corr` most recent updates to the gradient. (See [1]).
420 References
421 ----------
422 .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
423 storage." Mathematics of computation 35.151 (1980): 773-782.
425 """
427 def __init__(self, sk, yk):
428 """Construct the operator."""
429 if sk.shape != yk.shape or sk.ndim != 2:
430 raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
431 n_corrs, n = sk.shape
433 super(LbfgsInvHessProduct, self).__init__(
434 dtype=np.float64, shape=(n, n))
436 self.sk = sk
437 self.yk = yk
438 self.n_corrs = n_corrs
439 self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
441 def _matvec(self, x):
442 """Efficient matrix-vector multiply with the BFGS matrices.
444 This calculation is described in Section (4) of [1].
446 Parameters
447 ----------
448 x : ndarray
449 An array with shape (n,) or (n,1).
451 Returns
452 -------
453 y : ndarray
454 The matrix-vector product
456 """
457 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
458 q = np.array(x, dtype=self.dtype, copy=True)
459 if q.ndim == 2 and q.shape[1] == 1:
460 q = q.reshape(-1)
462 alpha = np.zeros(n_corrs)
464 for i in range(n_corrs-1, -1, -1):
465 alpha[i] = rho[i] * np.dot(s[i], q)
466 q = q - alpha[i]*y[i]
468 r = q
469 for i in range(n_corrs):
470 beta = rho[i] * np.dot(y[i], r)
471 r = r + s[i] * (alpha[i] - beta)
473 return r
475 def todense(self):
476 """Return a dense array representation of this operator.
478 Returns
479 -------
480 arr : ndarray, shape=(n, n)
481 An array with the same shape and containing
482 the same data represented by this `LinearOperator`.
484 """
485 s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
486 I = np.eye(*self.shape, dtype=self.dtype)
487 Hk = I
489 for i in range(n_corrs):
490 A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
491 A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
493 Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
494 s[i][np.newaxis, :])
495 return Hk