Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/base/optimizer.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Functions that are general enough to use for any model fitting. The idea is
3to untie these from LikelihoodModel so that they may be re-used generally.
4"""
6import numpy as np
7from scipy import optimize
10def _check_method(method, methods):
11 if method not in methods:
12 message = "Unknown fit method %s" % method
13 raise ValueError(message)
16class Optimizer(object):
17 def _fit(self, objective, gradient, start_params, fargs, kwargs,
18 hessian=None, method='newton', maxiter=100, full_output=True,
19 disp=True, callback=None, retall=False):
20 """
21 Fit function for any model with an objective function.
23 Parameters
24 ----------
25 start_params : array_like, optional
26 Initial guess of the solution for the loglikelihood maximization.
27 The default is an array of zeros.
28 method : str {'newton','nm','bfgs','powell','cg','ncg','basinhopping',
29 'minimize'}
30 Method can be 'newton' for Newton-Raphson, 'nm' for Nelder-Mead,
31 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno, 'powell' for modified
32 Powell's method, 'cg' for conjugate gradient, 'ncg' for Newton-
33 conjugate gradient, 'basinhopping' for global basin-hopping
34 solver, if available or a generic 'minimize' which is a wrapper for
35 scipy.optimize.minimize. `method` determines which solver from
36 scipy.optimize is used. The explicit arguments in `fit` are passed
37 to the solver, with the exception of the basin-hopping solver. Each
38 solver has several optional arguments that are not the same across
39 solvers. See the notes section below (or scipy.optimize) for the
40 available arguments and for the list of explicit arguments that the
41 basin-hopping solver supports..
42 maxiter : int
43 The maximum number of iterations to perform.
44 full_output : bool
45 Set to True to have all available output in the Results object's
46 mle_retvals attribute. The output is dependent on the solver.
47 See LikelihoodModelResults notes section for more information.
48 disp : bool
49 Set to True to print convergence messages.
50 fargs : tuple
51 Extra arguments passed to the likelihood function, i.e.,
52 loglike(x,*args)
53 callback : callable callback(xk)
54 Called after each iteration, as callback(xk), where xk is the
55 current parameter vector.
56 retall : bool
57 Set to True to return list of solutions at each iteration.
58 Available in Results object's mle_retvals attribute.
60 Returns
61 -------
62 xopt : ndarray
63 The solution to the objective function
64 retvals : dict, None
65 If `full_output` is True then this is a dictionary which holds
66 information returned from the solver used. If it is False, this is
67 None.
68 optim_settings : dict
69 A dictionary that contains the parameters passed to the solver.
71 Notes
72 -----
73 The 'basinhopping' solver ignores `maxiter`, `retall`, `full_output`
74 explicit arguments.
76 Optional arguments for the solvers (available in Results.mle_settings)::
78 'newton'
79 tol : float
80 Relative error in params acceptable for convergence.
81 'nm' -- Nelder Mead
82 xtol : float
83 Relative error in params acceptable for convergence
84 ftol : float
85 Relative error in loglike(params) acceptable for
86 convergence
87 maxfun : int
88 Maximum number of function evaluations to make.
89 'bfgs'
90 gtol : float
91 Stop when norm of gradient is less than gtol.
92 norm : float
93 Order of norm (np.Inf is max, -np.Inf is min)
94 epsilon
95 If fprime is approximated, use this value for the step
96 size. Only relevant if LikelihoodModel.score is None.
97 'lbfgs'
98 m : int
99 The maximum number of variable metric corrections used to
100 define the limited memory matrix. (The limited memory BFGS
101 method does not store the full hessian but uses this many
102 terms in an approximation to it.)
103 pgtol : float
104 The iteration will stop when
105 ``max{|proj g_i | i = 1, ..., n} <= pgtol`` where pg_i is
106 the i-th component of the projected gradient.
107 factr : float
108 The iteration stops when
109 ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
110 where eps is the machine precision, which is automatically
111 generated by the code. Typical values for factr are: 1e12
112 for low accuracy; 1e7 for moderate accuracy; 10.0 for
113 extremely high accuracy. See Notes for relationship to
114 ftol, which is exposed (instead of factr) by the
115 scipy.optimize.minimize interface to L-BFGS-B.
116 maxfun : int
117 Maximum number of iterations.
118 epsilon : float
119 Step size used when approx_grad is True, for numerically
120 calculating the gradient
121 approx_grad : bool
122 Whether to approximate the gradient numerically (in which
123 case func returns only the function value).
124 'cg'
125 gtol : float
126 Stop when norm of gradient is less than gtol.
127 norm : float
128 Order of norm (np.Inf is max, -np.Inf is min)
129 epsilon : float
130 If fprime is approximated, use this value for the step
131 size. Can be scalar or vector. Only relevant if
132 Likelihoodmodel.score is None.
133 'ncg'
134 fhess_p : callable f'(x,*args)
135 Function which computes the Hessian of f times an arbitrary
136 vector, p. Should only be supplied if
137 LikelihoodModel.hessian is None.
138 avextol : float
139 Stop when the average relative error in the minimizer
140 falls below this amount.
141 epsilon : float or ndarray
142 If fhess is approximated, use this value for the step size.
143 Only relevant if Likelihoodmodel.hessian is None.
144 'powell'
145 xtol : float
146 Line-search error tolerance
147 ftol : float
148 Relative error in loglike(params) for acceptable for
149 convergence.
150 maxfun : int
151 Maximum number of function evaluations to make.
152 start_direc : ndarray
153 Initial direction set.
154 'basinhopping'
155 niter : int
156 The number of basin hopping iterations.
157 niter_success : int
158 Stop the run if the global minimum candidate remains the
159 same for this number of iterations.
160 T : float
161 The "temperature" parameter for the accept or reject
162 criterion. Higher "temperatures" mean that larger jumps
163 in function value will be accepted. For best results
164 `T` should be comparable to the separation (in function
165 value) between local minima.
166 stepsize : float
167 Initial step size for use in the random displacement.
168 interval : int
169 The interval for how often to update the `stepsize`.
170 minimizer : dict
171 Extra keyword arguments to be passed to the minimizer
172 `scipy.optimize.minimize()`, for example 'method' - the
173 minimization method (e.g. 'L-BFGS-B'), or 'tol' - the
174 tolerance for termination. Other arguments are mapped from
175 explicit argument of `fit`:
176 - `args` <- `fargs`
177 - `jac` <- `score`
178 - `hess` <- `hess`
179 'minimize'
180 min_method : str, optional
181 Name of minimization method to use.
182 Any method specific arguments can be passed directly.
183 For a list of methods and their arguments, see
184 documentation of `scipy.optimize.minimize`.
185 If no method is specified, then BFGS is used.
186 """
187 #TODO: generalize the regularization stuff
188 # Extract kwargs specific to fit_regularized calling fit
189 extra_fit_funcs = kwargs.setdefault('extra_fit_funcs', dict())
191 methods = ['newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg',
192 'basinhopping', 'minimize']
193 methods += extra_fit_funcs.keys()
194 method = method.lower()
195 _check_method(method, methods)
197 fit_funcs = {
198 'newton': _fit_newton,
199 'nm': _fit_nm, # Nelder-Mead
200 'bfgs': _fit_bfgs,
201 'lbfgs': _fit_lbfgs,
202 'cg': _fit_cg,
203 'ncg': _fit_ncg,
204 'powell': _fit_powell,
205 'basinhopping': _fit_basinhopping,
206 'minimize': _fit_minimize # wrapper for scipy.optimize.minimize
207 }
209 #NOTE: fit_regularized checks the methods for these but it should be
210 # moved up probably
211 if extra_fit_funcs:
212 fit_funcs.update(extra_fit_funcs)
214 func = fit_funcs[method]
215 xopt, retvals = func(objective, gradient, start_params, fargs, kwargs,
216 disp=disp, maxiter=maxiter, callback=callback,
217 retall=retall, full_output=full_output,
218 hess=hessian)
220 optim_settings = {'optimizer': method, 'start_params': start_params,
221 'maxiter': maxiter, 'full_output': full_output,
222 'disp': disp, 'fargs': fargs, 'callback': callback,
223 'retall': retall}
224 optim_settings.update(kwargs)
225 # set as attributes or return?
226 return xopt, retvals, optim_settings
228 def _fit_constrained(self, params):
229 """
230 TODO: how to add constraints?
232 Something like
233 sm.add_constraint(Model, func)
235 or
237 model_instance.add_constraint(func)
238 model_instance.add_constraint("x1 + x2 = 2")
239 result = model_instance.fit()
240 """
241 raise NotImplementedError
243 def _fit_regularized(self, params):
244 # TODO: code will not necessarily be general here. 3 options.
245 # 1) setup for scipy.optimize.fmin_sqlsqp
246 # 2) setup for cvxopt
247 # 3) setup for openopt
248 raise NotImplementedError
251########################################
252# Helper functions to fit
255def _fit_minimize(f, score, start_params, fargs, kwargs, disp=True,
256 maxiter=100, callback=None, retall=False,
257 full_output=True, hess=None):
258 kwargs.setdefault('min_method', 'BFGS')
260 # prepare options dict for minimize
261 filter_opts = ['extra_fit_funcs', 'niter', 'min_method', 'tol']
262 options = dict((k,v) for k,v in kwargs.items() if k not in filter_opts)
263 options['disp'] = disp
264 options['maxiter'] = maxiter
266 # Use Hessian/Jacobian only if they're required by the method
267 no_hess = ['Nelder-Mead', 'Powell', 'CG', 'BFGS', 'COBYLA', 'SLSQP']
268 no_jac = ['Nelder-Mead', 'Powell', 'COBYLA']
269 if kwargs['min_method'] in no_hess:
270 hess = None
271 if kwargs['min_method'] in no_jac:
272 score = None
274 res = optimize.minimize(f, start_params, args=fargs, method=kwargs['min_method'],
275 jac=score, hess=hess, callback=callback, options=options)
277 xopt = res.x
278 retvals = None
279 if full_output:
280 nit = getattr(res, 'nit', np.nan) # scipy 0.14 compat
281 retvals = {'fopt': res.fun, 'iterations': nit,
282 'fcalls': res.nfev, 'warnflag': res.status,
283 'converged': res.success}
284 if retall:
285 retvals.update({'allvecs': res.values()})
287 return xopt, retvals
290def _fit_newton(f, score, start_params, fargs, kwargs, disp=True,
291 maxiter=100, callback=None, retall=False,
292 full_output=True, hess=None, ridge_factor=1e-10):
293 tol = kwargs.setdefault('tol', 1e-8)
294 iterations = 0
295 oldparams = np.inf
296 newparams = np.asarray(start_params)
297 if retall:
298 history = [oldparams, newparams]
299 while (iterations < maxiter and np.any(np.abs(newparams -
300 oldparams) > tol)):
301 H = np.asarray(hess(newparams))
302 # regularize Hessian, not clear what ridge factor should be
303 # keyword option with absolute default 1e-10, see #1847
304 if not np.all(ridge_factor == 0):
305 H[np.diag_indices(H.shape[0])] += ridge_factor
306 oldparams = newparams
307 newparams = oldparams - np.dot(np.linalg.inv(H),
308 score(oldparams))
309 if retall:
310 history.append(newparams)
311 if callback is not None:
312 callback(newparams)
313 iterations += 1
314 fval = f(newparams, *fargs) # this is the negative likelihood
315 if iterations == maxiter:
316 warnflag = 1
317 if disp:
318 print("Warning: Maximum number of iterations has been "
319 "exceeded.")
320 print(" Current function value: %f" % fval)
321 print(" Iterations: %d" % iterations)
322 else:
323 warnflag = 0
324 if disp:
325 print("Optimization terminated successfully.")
326 print(" Current function value: %f" % fval)
327 print(" Iterations %d" % iterations)
328 if full_output:
329 (xopt, fopt, niter,
330 gopt, hopt) = (newparams, f(newparams, *fargs),
331 iterations, score(newparams),
332 hess(newparams))
333 converged = not warnflag
334 retvals = {'fopt': fopt, 'iterations': niter, 'score': gopt,
335 'Hessian': hopt, 'warnflag': warnflag,
336 'converged': converged}
337 if retall:
338 retvals.update({'allvecs': history})
340 else:
341 xopt = newparams
342 retvals = None
344 return xopt, retvals
347def _fit_bfgs(f, score, start_params, fargs, kwargs, disp=True,
348 maxiter=100, callback=None, retall=False,
349 full_output=True, hess=None):
350 gtol = kwargs.setdefault('gtol', 1.0000000000000001e-05)
351 norm = kwargs.setdefault('norm', np.Inf)
352 epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08)
353 retvals = optimize.fmin_bfgs(f, start_params, score, args=fargs,
354 gtol=gtol, norm=norm, epsilon=epsilon,
355 maxiter=maxiter, full_output=full_output,
356 disp=disp, retall=retall, callback=callback)
357 if full_output:
358 if not retall:
359 xopt, fopt, gopt, Hinv, fcalls, gcalls, warnflag = retvals
360 else:
361 (xopt, fopt, gopt, Hinv, fcalls,
362 gcalls, warnflag, allvecs) = retvals
363 converged = not warnflag
364 retvals = {'fopt': fopt, 'gopt': gopt, 'Hinv': Hinv,
365 'fcalls': fcalls, 'gcalls': gcalls, 'warnflag':
366 warnflag, 'converged': converged}
367 if retall:
368 retvals.update({'allvecs': allvecs})
369 else:
370 xopt = retvals
371 retvals = None
373 return xopt, retvals
376def _fit_lbfgs(f, score, start_params, fargs, kwargs, disp=True, maxiter=100,
377 callback=None, retall=False, full_output=True, hess=None):
378 """
379 Fit model using L-BFGS algorithm
381 Parameters
382 ----------
383 f : function
384 Returns negative log likelihood given parameters.
385 score : function
386 Returns gradient of negative log likelihood with respect to params.
388 Notes
389 -----
390 Within the mle part of statsmodels, the log likelihood function and
391 its gradient with respect to the parameters do not have notationally
392 consistent sign.
393 """
395 # Use unconstrained optimization by default.
396 bounds = kwargs.setdefault('bounds', [(None, None)] * len(start_params))
397 kwargs.setdefault('iprint', 0)
399 # Pass the following keyword argument names through to fmin_l_bfgs_b
400 # if they are present in kwargs, otherwise use the fmin_l_bfgs_b
401 # default values.
402 names = ('m', 'pgtol', 'factr', 'maxfun', 'epsilon', 'approx_grad')
403 extra_kwargs = dict((x, kwargs[x]) for x in names if x in kwargs)
405 # Extract values for the options related to the gradient.
406 approx_grad = kwargs.get('approx_grad', False)
407 loglike_and_score = kwargs.get('loglike_and_score', None)
408 epsilon = kwargs.get('epsilon', None)
410 # The approx_grad flag has superpowers nullifying the score function arg.
411 if approx_grad:
412 score = None
414 # Choose among three options for dealing with the gradient (the gradient
415 # of a log likelihood function with respect to its parameters
416 # is more specifically called the score in statistics terminology).
417 # The first option is to use the finite-differences
418 # approximation that is built into the fmin_l_bfgs_b optimizer.
419 # The second option is to use the provided score function.
420 # The third option is to use the score component of a provided
421 # function that simultaneously evaluates the log likelihood and score.
422 if epsilon and not approx_grad:
423 raise ValueError('a finite-differences epsilon was provided '
424 'even though we are not using approx_grad')
425 if approx_grad and loglike_and_score:
426 raise ValueError('gradient approximation was requested '
427 'even though an analytic loglike_and_score function '
428 'was given')
429 if loglike_and_score:
430 func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a))
431 elif score:
432 func = f
433 extra_kwargs['fprime'] = score
434 elif approx_grad:
435 func = f
437 retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter,
438 callback=callback, args=fargs,
439 bounds=bounds, disp=disp,
440 **extra_kwargs)
442 if full_output:
443 xopt, fopt, d = retvals
444 # The warnflag is
445 # 0 if converged
446 # 1 if too many function evaluations or too many iterations
447 # 2 if stopped for another reason, given in d['task']
448 warnflag = d['warnflag']
449 converged = (warnflag == 0)
450 gopt = d['grad']
451 fcalls = d['funcalls']
452 iterations = d['nit']
453 retvals = {'fopt': fopt, 'gopt': gopt, 'fcalls': fcalls,
454 'warnflag': warnflag, 'converged': converged,
455 'iterations': iterations}
456 else:
457 xopt = retvals[0]
458 retvals = None
460 return xopt, retvals
463def _fit_nm(f, score, start_params, fargs, kwargs, disp=True,
464 maxiter=100, callback=None, retall=False,
465 full_output=True, hess=None):
466 xtol = kwargs.setdefault('xtol', 0.0001)
467 ftol = kwargs.setdefault('ftol', 0.0001)
468 maxfun = kwargs.setdefault('maxfun', None)
469 retvals = optimize.fmin(f, start_params, args=fargs, xtol=xtol,
470 ftol=ftol, maxiter=maxiter, maxfun=maxfun,
471 full_output=full_output, disp=disp, retall=retall,
472 callback=callback)
473 if full_output:
474 if not retall:
475 xopt, fopt, niter, fcalls, warnflag = retvals
476 else:
477 xopt, fopt, niter, fcalls, warnflag, allvecs = retvals
478 converged = not warnflag
479 retvals = {'fopt': fopt, 'iterations': niter,
480 'fcalls': fcalls, 'warnflag': warnflag,
481 'converged': converged}
482 if retall:
483 retvals.update({'allvecs': allvecs})
484 else:
485 xopt = retvals
486 retvals = None
488 return xopt, retvals
491def _fit_cg(f, score, start_params, fargs, kwargs, disp=True,
492 maxiter=100, callback=None, retall=False,
493 full_output=True, hess=None):
494 gtol = kwargs.setdefault('gtol', 1.0000000000000001e-05)
495 norm = kwargs.setdefault('norm', np.Inf)
496 epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08)
497 retvals = optimize.fmin_cg(f, start_params, score, gtol=gtol, norm=norm,
498 epsilon=epsilon, maxiter=maxiter,
499 full_output=full_output, disp=disp,
500 retall=retall, callback=callback)
501 if full_output:
502 if not retall:
503 xopt, fopt, fcalls, gcalls, warnflag = retvals
504 else:
505 xopt, fopt, fcalls, gcalls, warnflag, allvecs = retvals
506 converged = not warnflag
507 retvals = {'fopt': fopt, 'fcalls': fcalls, 'gcalls': gcalls,
508 'warnflag': warnflag, 'converged': converged}
509 if retall:
510 retvals.update({'allvecs': allvecs})
512 else:
513 xopt = retvals
514 retvals = None
516 return xopt, retvals
519def _fit_ncg(f, score, start_params, fargs, kwargs, disp=True,
520 maxiter=100, callback=None, retall=False,
521 full_output=True, hess=None):
522 fhess_p = kwargs.setdefault('fhess_p', None)
523 avextol = kwargs.setdefault('avextol', 1.0000000000000001e-05)
524 epsilon = kwargs.setdefault('epsilon', 1.4901161193847656e-08)
525 retvals = optimize.fmin_ncg(f, start_params, score, fhess_p=fhess_p,
526 fhess=hess, args=fargs, avextol=avextol,
527 epsilon=epsilon, maxiter=maxiter,
528 full_output=full_output, disp=disp,
529 retall=retall, callback=callback)
530 if full_output:
531 if not retall:
532 xopt, fopt, fcalls, gcalls, hcalls, warnflag = retvals
533 else:
534 xopt, fopt, fcalls, gcalls, hcalls, warnflag, allvecs =\
535 retvals
536 converged = not warnflag
537 retvals = {'fopt': fopt, 'fcalls': fcalls, 'gcalls': gcalls,
538 'hcalls': hcalls, 'warnflag': warnflag,
539 'converged': converged}
540 if retall:
541 retvals.update({'allvecs': allvecs})
542 else:
543 xopt = retvals
544 retvals = None
546 return xopt, retvals
549def _fit_powell(f, score, start_params, fargs, kwargs, disp=True,
550 maxiter=100, callback=None, retall=False,
551 full_output=True, hess=None):
552 xtol = kwargs.setdefault('xtol', 0.0001)
553 ftol = kwargs.setdefault('ftol', 0.0001)
554 maxfun = kwargs.setdefault('maxfun', None)
555 start_direc = kwargs.setdefault('start_direc', None)
556 retvals = optimize.fmin_powell(f, start_params, args=fargs, xtol=xtol,
557 ftol=ftol, maxiter=maxiter, maxfun=maxfun,
558 full_output=full_output, disp=disp,
559 retall=retall, callback=callback,
560 direc=start_direc)
561 if full_output:
562 if not retall:
563 xopt, fopt, direc, niter, fcalls, warnflag = retvals
564 else:
565 xopt, fopt, direc, niter, fcalls, warnflag, allvecs =\
566 retvals
567 converged = not warnflag
568 retvals = {'fopt': fopt, 'direc': direc, 'iterations': niter,
569 'fcalls': fcalls, 'warnflag': warnflag,
570 'converged': converged}
571 if retall:
572 retvals.update({'allvecs': allvecs})
573 else:
574 xopt = retvals
575 retvals = None
577 return xopt, retvals
580def _fit_basinhopping(f, score, start_params, fargs, kwargs, disp=True,
581 maxiter=100, callback=None, retall=False,
582 full_output=True, hess=None):
584 from copy import copy
585 kwargs = copy(kwargs)
586 niter = kwargs.setdefault('niter', 100)
587 niter_success = kwargs.setdefault('niter_success', None)
588 T = kwargs.setdefault('T', 1.0)
589 stepsize = kwargs.setdefault('stepsize', 0.5)
590 interval = kwargs.setdefault('interval', 50)
591 minimizer_kwargs = kwargs.get('minimizer', {})
592 minimizer_kwargs['args'] = fargs
593 minimizer_kwargs['jac'] = score
594 method = minimizer_kwargs.get('method', None)
595 if method and method != 'L-BFGS-B': # l_bfgs_b does not take a hessian
596 minimizer_kwargs['hess'] = hess
598 retvals = optimize.basinhopping(f, start_params,
599 minimizer_kwargs=minimizer_kwargs,
600 niter=niter, niter_success=niter_success,
601 T=T, stepsize=stepsize, disp=disp,
602 callback=callback, interval=interval)
603 if full_output:
604 xopt, fopt, niter, fcalls = map(lambda x : getattr(retvals, x),
605 ['x', 'fun', 'nit', 'nfev'])
606 converged = 'completed successfully' in retvals.message[0]
607 retvals = {'fopt': fopt, 'iterations': niter,
608 'fcalls': fcalls, 'converged': converged}
610 else:
611 xopt = retvals.x
612 retvals = None
614 return xopt, retvals