Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Functions 

3--------- 

4.. autosummary:: 

5 :toctree: generated/ 

6 

7 line_search_armijo 

8 line_search_wolfe1 

9 line_search_wolfe2 

10 scalar_search_wolfe1 

11 scalar_search_wolfe2 

12 

13""" 

14from warnings import warn 

15 

16from scipy.optimize import minpack2 

17import numpy as np 

18 

19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2', 

20 'scalar_search_wolfe1', 'scalar_search_wolfe2', 

21 'line_search_armijo'] 

22 

23class LineSearchWarning(RuntimeWarning): 

24 pass 

25 

26 

27#------------------------------------------------------------------------------ 

28# Minpack's Wolfe line and scalar searches 

29#------------------------------------------------------------------------------ 

30 

31def line_search_wolfe1(f, fprime, xk, pk, gfk=None, 

32 old_fval=None, old_old_fval=None, 

33 args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8, 

34 xtol=1e-14): 

35 """ 

36 As `scalar_search_wolfe1` but do a line search to direction `pk` 

37 

38 Parameters 

39 ---------- 

40 f : callable 

41 Function `f(x)` 

42 fprime : callable 

43 Gradient of `f` 

44 xk : array_like 

45 Current point 

46 pk : array_like 

47 Search direction 

48 

49 gfk : array_like, optional 

50 Gradient of `f` at point `xk` 

51 old_fval : float, optional 

52 Value of `f` at point `xk` 

53 old_old_fval : float, optional 

54 Value of `f` at point preceding `xk` 

55 

56 The rest of the parameters are the same as for `scalar_search_wolfe1`. 

57 

58 Returns 

59 ------- 

60 stp, f_count, g_count, fval, old_fval 

61 As in `line_search_wolfe1` 

62 gval : array 

63 Gradient of `f` at the final point 

64 

65 """ 

66 if gfk is None: 

67 gfk = fprime(xk) 

68 

69 if isinstance(fprime, tuple): 

70 eps = fprime[1] 

71 fprime = fprime[0] 

72 newargs = (f, eps) + args 

73 gradient = False 

74 else: 

75 newargs = args 

76 gradient = True 

77 

78 gval = [gfk] 

79 gc = [0] 

80 fc = [0] 

81 

82 def phi(s): 

83 fc[0] += 1 

84 return f(xk + s*pk, *args) 

85 

86 def derphi(s): 

87 gval[0] = fprime(xk + s*pk, *newargs) 

88 if gradient: 

89 gc[0] += 1 

90 else: 

91 fc[0] += len(xk) + 1 

92 return np.dot(gval[0], pk) 

93 

94 derphi0 = np.dot(gfk, pk) 

95 

96 stp, fval, old_fval = scalar_search_wolfe1( 

97 phi, derphi, old_fval, old_old_fval, derphi0, 

98 c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol) 

99 

100 return stp, fc[0], gc[0], fval, old_fval, gval[0] 

101 

102 

103def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None, 

104 c1=1e-4, c2=0.9, 

105 amax=50, amin=1e-8, xtol=1e-14): 

106 """ 

107 Scalar function search for alpha that satisfies strong Wolfe conditions 

108 

109 alpha > 0 is assumed to be a descent direction. 

110 

111 Parameters 

112 ---------- 

113 phi : callable phi(alpha) 

114 Function at point `alpha` 

115 derphi : callable phi'(alpha) 

116 Objective function derivative. Returns a scalar. 

117 phi0 : float, optional 

118 Value of phi at 0 

119 old_phi0 : float, optional 

120 Value of phi at previous point 

121 derphi0 : float, optional 

122 Value derphi at 0 

123 c1 : float, optional 

124 Parameter for Armijo condition rule. 

125 c2 : float, optional 

126 Parameter for curvature condition rule. 

127 amax, amin : float, optional 

128 Maximum and minimum step size 

129 xtol : float, optional 

130 Relative tolerance for an acceptable step. 

131 

132 Returns 

133 ------- 

134 alpha : float 

135 Step size, or None if no suitable step was found 

136 phi : float 

137 Value of `phi` at the new point `alpha` 

138 phi0 : float 

139 Value of `phi` at `alpha=0` 

140 

141 Notes 

142 ----- 

143 Uses routine DCSRCH from MINPACK. 

144 

145 """ 

146 

147 if phi0 is None: 

148 phi0 = phi(0.) 

149 if derphi0 is None: 

150 derphi0 = derphi(0.) 

151 

152 if old_phi0 is not None and derphi0 != 0: 

153 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 

154 if alpha1 < 0: 

155 alpha1 = 1.0 

156 else: 

157 alpha1 = 1.0 

158 

159 phi1 = phi0 

160 derphi1 = derphi0 

161 isave = np.zeros((2,), np.intc) 

162 dsave = np.zeros((13,), float) 

163 task = b'START' 

164 

165 maxiter = 100 

166 for i in range(maxiter): 

167 stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1, 

168 c1, c2, xtol, task, 

169 amin, amax, isave, dsave) 

170 if task[:2] == b'FG': 

171 alpha1 = stp 

172 phi1 = phi(stp) 

173 derphi1 = derphi(stp) 

174 else: 

175 break 

176 else: 

177 # maxiter reached, the line search did not converge 

178 stp = None 

179 

180 if task[:5] == b'ERROR' or task[:4] == b'WARN': 

181 stp = None # failed 

182 

183 return stp, phi1, phi0 

184 

185 

186line_search = line_search_wolfe1 

187 

188 

189#------------------------------------------------------------------------------ 

190# Pure-Python Wolfe line and scalar searches 

191#------------------------------------------------------------------------------ 

192 

193def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, 

194 old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None, 

195 extra_condition=None, maxiter=10): 

196 """Find alpha that satisfies strong Wolfe conditions. 

197 

198 Parameters 

199 ---------- 

200 f : callable f(x,*args) 

201 Objective function. 

202 myfprime : callable f'(x,*args) 

203 Objective function gradient. 

204 xk : ndarray 

205 Starting point. 

206 pk : ndarray 

207 Search direction. 

208 gfk : ndarray, optional 

209 Gradient value for x=xk (xk being the current parameter 

210 estimate). Will be recomputed if omitted. 

211 old_fval : float, optional 

212 Function value for x=xk. Will be recomputed if omitted. 

213 old_old_fval : float, optional 

214 Function value for the point preceding x=xk. 

215 args : tuple, optional 

216 Additional arguments passed to objective function. 

217 c1 : float, optional 

218 Parameter for Armijo condition rule. 

219 c2 : float, optional 

220 Parameter for curvature condition rule. 

221 amax : float, optional 

222 Maximum step size 

223 extra_condition : callable, optional 

224 A callable of the form ``extra_condition(alpha, x, f, g)`` 

225 returning a boolean. Arguments are the proposed step ``alpha`` 

226 and the corresponding ``x``, ``f`` and ``g`` values. The line search 

227 accepts the value of ``alpha`` only if this 

228 callable returns ``True``. If the callable returns ``False`` 

229 for the step length, the algorithm will continue with 

230 new iterates. The callable is only called for iterates 

231 satisfying the strong Wolfe conditions. 

232 maxiter : int, optional 

233 Maximum number of iterations to perform. 

234 

235 Returns 

236 ------- 

237 alpha : float or None 

238 Alpha for which ``x_new = x0 + alpha * pk``, 

239 or None if the line search algorithm did not converge. 

240 fc : int 

241 Number of function evaluations made. 

242 gc : int 

243 Number of gradient evaluations made. 

244 new_fval : float or None 

245 New function value ``f(x_new)=f(x0+alpha*pk)``, 

246 or None if the line search algorithm did not converge. 

247 old_fval : float 

248 Old function value ``f(x0)``. 

249 new_slope : float or None 

250 The local slope along the search direction at the 

251 new value ``<myfprime(x_new), pk>``, 

252 or None if the line search algorithm did not converge. 

253 

254 

255 Notes 

256 ----- 

257 Uses the line search algorithm to enforce strong Wolfe 

258 conditions. See Wright and Nocedal, 'Numerical Optimization', 

259 1999, pp. 59-61. 

260 

261 Examples 

262 -------- 

263 >>> from scipy.optimize import line_search 

264 

265 A objective function and its gradient are defined. 

266 

267 >>> def obj_func(x): 

268 ... return (x[0])**2+(x[1])**2 

269 >>> def obj_grad(x): 

270 ... return [2*x[0], 2*x[1]] 

271 

272 We can find alpha that satisfies strong Wolfe conditions. 

273 

274 >>> start_point = np.array([1.8, 1.7]) 

275 >>> search_gradient = np.array([-1.0, -1.0]) 

276 >>> line_search(obj_func, obj_grad, start_point, search_gradient) 

277 (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4]) 

278 

279 """ 

280 fc = [0] 

281 gc = [0] 

282 gval = [None] 

283 gval_alpha = [None] 

284 

285 def phi(alpha): 

286 fc[0] += 1 

287 return f(xk + alpha * pk, *args) 

288 

289 if isinstance(myfprime, tuple): 

290 def derphi(alpha): 

291 fc[0] += len(xk) + 1 

292 eps = myfprime[1] 

293 fprime = myfprime[0] 

294 newargs = (f, eps) + args 

295 gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use 

296 gval_alpha[0] = alpha 

297 return np.dot(gval[0], pk) 

298 else: 

299 fprime = myfprime 

300 

301 def derphi(alpha): 

302 gc[0] += 1 

303 gval[0] = fprime(xk + alpha * pk, *args) # store for later use 

304 gval_alpha[0] = alpha 

305 return np.dot(gval[0], pk) 

306 

307 if gfk is None: 

308 gfk = fprime(xk, *args) 

309 derphi0 = np.dot(gfk, pk) 

310 

311 if extra_condition is not None: 

312 # Add the current gradient as argument, to avoid needless 

313 # re-evaluation 

314 def extra_condition2(alpha, phi): 

315 if gval_alpha[0] != alpha: 

316 derphi(alpha) 

317 x = xk + alpha * pk 

318 return extra_condition(alpha, x, phi, gval[0]) 

319 else: 

320 extra_condition2 = None 

321 

322 alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2( 

323 phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax, 

324 extra_condition2, maxiter=maxiter) 

325 

326 if derphi_star is None: 

327 warn('The line search algorithm did not converge', LineSearchWarning) 

328 else: 

329 # derphi_star is a number (derphi) -- so use the most recently 

330 # calculated gradient used in computing it derphi = gfk*pk 

331 # this is the gradient at the next step no need to compute it 

332 # again in the outer loop. 

333 derphi_star = gval[0] 

334 

335 return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star 

336 

337 

338def scalar_search_wolfe2(phi, derphi, phi0=None, 

339 old_phi0=None, derphi0=None, 

340 c1=1e-4, c2=0.9, amax=None, 

341 extra_condition=None, maxiter=10): 

342 """Find alpha that satisfies strong Wolfe conditions. 

343 

344 alpha > 0 is assumed to be a descent direction. 

345 

346 Parameters 

347 ---------- 

348 phi : callable phi(alpha) 

349 Objective scalar function. 

350 derphi : callable phi'(alpha) 

351 Objective function derivative. Returns a scalar. 

352 phi0 : float, optional 

353 Value of phi at 0. 

354 old_phi0 : float, optional 

355 Value of phi at previous point. 

356 derphi0 : float, optional 

357 Value of derphi at 0 

358 c1 : float, optional 

359 Parameter for Armijo condition rule. 

360 c2 : float, optional 

361 Parameter for curvature condition rule. 

362 amax : float, optional 

363 Maximum step size. 

364 extra_condition : callable, optional 

365 A callable of the form ``extra_condition(alpha, phi_value)`` 

366 returning a boolean. The line search accepts the value 

367 of ``alpha`` only if this callable returns ``True``. 

368 If the callable returns ``False`` for the step length, 

369 the algorithm will continue with new iterates. 

370 The callable is only called for iterates satisfying 

371 the strong Wolfe conditions. 

372 maxiter : int, optional 

373 Maximum number of iterations to perform. 

374 

375 Returns 

376 ------- 

377 alpha_star : float or None 

378 Best alpha, or None if the line search algorithm did not converge. 

379 phi_star : float 

380 phi at alpha_star. 

381 phi0 : float 

382 phi at 0. 

383 derphi_star : float or None 

384 derphi at alpha_star, or None if the line search algorithm 

385 did not converge. 

386 

387 Notes 

388 ----- 

389 Uses the line search algorithm to enforce strong Wolfe 

390 conditions. See Wright and Nocedal, 'Numerical Optimization', 

391 1999, pp. 59-61. 

392 

393 """ 

394 

395 if phi0 is None: 

396 phi0 = phi(0.) 

397 

398 if derphi0 is None: 

399 derphi0 = derphi(0.) 

400 

401 alpha0 = 0 

402 if old_phi0 is not None and derphi0 != 0: 

403 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 

404 else: 

405 alpha1 = 1.0 

406 

407 if alpha1 < 0: 

408 alpha1 = 1.0 

409 

410 if amax is not None: 

411 alpha1 = min(alpha1, amax) 

412 

413 phi_a1 = phi(alpha1) 

414 #derphi_a1 = derphi(alpha1) evaluated below 

415 

416 phi_a0 = phi0 

417 derphi_a0 = derphi0 

418 

419 if extra_condition is None: 

420 extra_condition = lambda alpha, phi: True 

421 

422 for i in range(maxiter): 

423 if alpha1 == 0 or (amax is not None and alpha0 == amax): 

424 # alpha1 == 0: This shouldn't happen. Perhaps the increment has 

425 # slipped below machine precision? 

426 alpha_star = None 

427 phi_star = phi0 

428 phi0 = old_phi0 

429 derphi_star = None 

430 

431 if alpha1 == 0: 

432 msg = 'Rounding errors prevent the line search from converging' 

433 else: 

434 msg = "The line search algorithm could not find a solution " + \ 

435 "less than or equal to amax: %s" % amax 

436 

437 warn(msg, LineSearchWarning) 

438 break 

439 

440 if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \ 

441 ((phi_a1 >= phi_a0) and (i > 1)): 

442 alpha_star, phi_star, derphi_star = \ 

443 _zoom(alpha0, alpha1, phi_a0, 

444 phi_a1, derphi_a0, phi, derphi, 

445 phi0, derphi0, c1, c2, extra_condition) 

446 break 

447 

448 derphi_a1 = derphi(alpha1) 

449 if (abs(derphi_a1) <= -c2*derphi0): 

450 if extra_condition(alpha1, phi_a1): 

451 alpha_star = alpha1 

452 phi_star = phi_a1 

453 derphi_star = derphi_a1 

454 break 

455 

456 if (derphi_a1 >= 0): 

457 alpha_star, phi_star, derphi_star = \ 

458 _zoom(alpha1, alpha0, phi_a1, 

459 phi_a0, derphi_a1, phi, derphi, 

460 phi0, derphi0, c1, c2, extra_condition) 

461 break 

462 

463 alpha2 = 2 * alpha1 # increase by factor of two on each iteration 

464 if amax is not None: 

465 alpha2 = min(alpha2, amax) 

466 alpha0 = alpha1 

467 alpha1 = alpha2 

468 phi_a0 = phi_a1 

469 phi_a1 = phi(alpha1) 

470 derphi_a0 = derphi_a1 

471 

472 else: 

473 # stopping test maxiter reached 

474 alpha_star = alpha1 

475 phi_star = phi_a1 

476 derphi_star = None 

477 warn('The line search algorithm did not converge', LineSearchWarning) 

478 

479 return alpha_star, phi_star, phi0, derphi_star 

480 

481 

482def _cubicmin(a, fa, fpa, b, fb, c, fc): 

483 """ 

484 Finds the minimizer for a cubic polynomial that goes through the 

485 points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. 

486 

487 If no minimizer can be found, return None. 

488 

489 """ 

490 # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D 

491 

492 with np.errstate(divide='raise', over='raise', invalid='raise'): 

493 try: 

494 C = fpa 

495 db = b - a 

496 dc = c - a 

497 denom = (db * dc) ** 2 * (db - dc) 

498 d1 = np.empty((2, 2)) 

499 d1[0, 0] = dc ** 2 

500 d1[0, 1] = -db ** 2 

501 d1[1, 0] = -dc ** 3 

502 d1[1, 1] = db ** 3 

503 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, 

504 fc - fa - C * dc]).flatten()) 

505 A /= denom 

506 B /= denom 

507 radical = B * B - 3 * A * C 

508 xmin = a + (-B + np.sqrt(radical)) / (3 * A) 

509 except ArithmeticError: 

510 return None 

511 if not np.isfinite(xmin): 

512 return None 

513 return xmin 

514 

515 

516def _quadmin(a, fa, fpa, b, fb): 

517 """ 

518 Finds the minimizer for a quadratic polynomial that goes through 

519 the points (a,fa), (b,fb) with derivative at a of fpa. 

520 

521 """ 

522 # f(x) = B*(x-a)^2 + C*(x-a) + D 

523 with np.errstate(divide='raise', over='raise', invalid='raise'): 

524 try: 

525 D = fa 

526 C = fpa 

527 db = b - a * 1.0 

528 B = (fb - D - C * db) / (db * db) 

529 xmin = a - C / (2.0 * B) 

530 except ArithmeticError: 

531 return None 

532 if not np.isfinite(xmin): 

533 return None 

534 return xmin 

535 

536 

537def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo, 

538 phi, derphi, phi0, derphi0, c1, c2, extra_condition): 

539 """Zoom stage of approximate linesearch satisfying strong Wolfe conditions. 

540  

541 Part of the optimization algorithm in `scalar_search_wolfe2`. 

542  

543 Notes 

544 ----- 

545 Implements Algorithm 3.6 (zoom) in Wright and Nocedal, 

546 'Numerical Optimization', 1999, pp. 61. 

547 

548 """ 

549 

550 maxiter = 10 

551 i = 0 

552 delta1 = 0.2 # cubic interpolant check 

553 delta2 = 0.1 # quadratic interpolant check 

554 phi_rec = phi0 

555 a_rec = 0 

556 while True: 

557 # interpolate to find a trial step length between a_lo and 

558 # a_hi Need to choose interpolation here. Use cubic 

559 # interpolation and then if the result is within delta * 

560 # dalpha or outside of the interval bounded by a_lo or a_hi 

561 # then use quadratic interpolation, if the result is still too 

562 # close, then use bisection 

563 

564 dalpha = a_hi - a_lo 

565 if dalpha < 0: 

566 a, b = a_hi, a_lo 

567 else: 

568 a, b = a_lo, a_hi 

569 

570 # minimizer of cubic interpolant 

571 # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi) 

572 # 

573 # if the result is too close to the end points (or out of the 

574 # interval), then use quadratic interpolation with phi_lo, 

575 # derphi_lo and phi_hi if the result is still too close to the 

576 # end points (or out of the interval) then use bisection 

577 

578 if (i > 0): 

579 cchk = delta1 * dalpha 

580 a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, 

581 a_rec, phi_rec) 

582 if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk): 

583 qchk = delta2 * dalpha 

584 a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi) 

585 if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk): 

586 a_j = a_lo + 0.5*dalpha 

587 

588 # Check new value of a_j 

589 

590 phi_aj = phi(a_j) 

591 if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo): 

592 phi_rec = phi_hi 

593 a_rec = a_hi 

594 a_hi = a_j 

595 phi_hi = phi_aj 

596 else: 

597 derphi_aj = derphi(a_j) 

598 if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj): 

599 a_star = a_j 

600 val_star = phi_aj 

601 valprime_star = derphi_aj 

602 break 

603 if derphi_aj*(a_hi - a_lo) >= 0: 

604 phi_rec = phi_hi 

605 a_rec = a_hi 

606 a_hi = a_lo 

607 phi_hi = phi_lo 

608 else: 

609 phi_rec = phi_lo 

610 a_rec = a_lo 

611 a_lo = a_j 

612 phi_lo = phi_aj 

613 derphi_lo = derphi_aj 

614 i += 1 

615 if (i > maxiter): 

616 # Failed to find a conforming step size 

617 a_star = None 

618 val_star = None 

619 valprime_star = None 

620 break 

621 return a_star, val_star, valprime_star 

622 

623 

624#------------------------------------------------------------------------------ 

625# Armijo line and scalar searches 

626#------------------------------------------------------------------------------ 

627 

628def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 

629 """Minimize over alpha, the function ``f(xk+alpha pk)``. 

630 

631 Parameters 

632 ---------- 

633 f : callable 

634 Function to be minimized. 

635 xk : array_like 

636 Current point. 

637 pk : array_like 

638 Search direction. 

639 gfk : array_like 

640 Gradient of `f` at point `xk`. 

641 old_fval : float 

642 Value of `f` at point `xk`. 

643 args : tuple, optional 

644 Optional arguments. 

645 c1 : float, optional 

646 Value to control stopping criterion. 

647 alpha0 : scalar, optional 

648 Value of `alpha` at start of the optimization. 

649 

650 Returns 

651 ------- 

652 alpha 

653 f_count 

654 f_val_at_alpha 

655 

656 Notes 

657 ----- 

658 Uses the interpolation algorithm (Armijo backtracking) as suggested by 

659 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 

660 

661 """ 

662 xk = np.atleast_1d(xk) 

663 fc = [0] 

664 

665 def phi(alpha1): 

666 fc[0] += 1 

667 return f(xk + alpha1*pk, *args) 

668 

669 if old_fval is None: 

670 phi0 = phi(0.) 

671 else: 

672 phi0 = old_fval # compute f(xk) -- done in past loop 

673 

674 derphi0 = np.dot(gfk, pk) 

675 alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, 

676 alpha0=alpha0) 

677 return alpha, fc[0], phi1 

678 

679 

680def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 

681 """ 

682 Compatibility wrapper for `line_search_armijo` 

683 """ 

684 r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1, 

685 alpha0=alpha0) 

686 return r[0], r[1], 0, r[2] 

687 

688 

689def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0): 

690 """Minimize over alpha, the function ``phi(alpha)``. 

691 

692 Uses the interpolation algorithm (Armijo backtracking) as suggested by 

693 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 

694 

695 alpha > 0 is assumed to be a descent direction. 

696 

697 Returns 

698 ------- 

699 alpha 

700 phi1 

701 

702 """ 

703 phi_a0 = phi(alpha0) 

704 if phi_a0 <= phi0 + c1*alpha0*derphi0: 

705 return alpha0, phi_a0 

706 

707 # Otherwise, compute the minimizer of a quadratic interpolant: 

708 

709 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) 

710 phi_a1 = phi(alpha1) 

711 

712 if (phi_a1 <= phi0 + c1*alpha1*derphi0): 

713 return alpha1, phi_a1 

714 

715 # Otherwise, loop with cubic interpolation until we find an alpha which 

716 # satisfies the first Wolfe condition (since we are backtracking, we will 

717 # assume that the value of alpha is not too small and satisfies the second 

718 # condition. 

719 

720 while alpha1 > amin: # we are assuming alpha>0 is a descent direction 

721 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) 

722 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ 

723 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) 

724 a = a / factor 

725 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ 

726 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) 

727 b = b / factor 

728 

729 alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) 

730 phi_a2 = phi(alpha2) 

731 

732 if (phi_a2 <= phi0 + c1*alpha2*derphi0): 

733 return alpha2, phi_a2 

734 

735 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: 

736 alpha2 = alpha1 / 2.0 

737 

738 alpha0 = alpha1 

739 alpha1 = alpha2 

740 phi_a0 = phi_a1 

741 phi_a1 = phi_a2 

742 

743 # Failed to find a suitable step length 

744 return None, phi_a1 

745 

746 

747#------------------------------------------------------------------------------ 

748# Non-monotone line search for DF-SANE 

749#------------------------------------------------------------------------------ 

750 

751def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta, 

752 gamma=1e-4, tau_min=0.1, tau_max=0.5): 

753 """ 

754 Nonmonotone backtracking line search as described in [1]_ 

755 

756 Parameters 

757 ---------- 

758 f : callable 

759 Function returning a tuple ``(f, F)`` where ``f`` is the value 

760 of a merit function and ``F`` the residual. 

761 x_k : ndarray 

762 Initial position. 

763 d : ndarray 

764 Search direction. 

765 prev_fs : float 

766 List of previous merit function values. Should have ``len(prev_fs) <= M`` 

767 where ``M`` is the nonmonotonicity window parameter. 

768 eta : float 

769 Allowed merit function increase, see [1]_ 

770 gamma, tau_min, tau_max : float, optional 

771 Search parameters, see [1]_ 

772 

773 Returns 

774 ------- 

775 alpha : float 

776 Step length 

777 xp : ndarray 

778 Next position 

779 fp : float 

780 Merit function value at next position 

781 Fp : ndarray 

782 Residual at next position 

783 

784 References 

785 ---------- 

786 [1] "Spectral residual method without gradient information for solving 

787 large-scale nonlinear systems of equations." W. La Cruz, 

788 J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006). 

789 

790 """ 

791 f_k = prev_fs[-1] 

792 f_bar = max(prev_fs) 

793 

794 alpha_p = 1 

795 alpha_m = 1 

796 alpha = 1 

797 

798 while True: 

799 xp = x_k + alpha_p * d 

800 fp, Fp = f(xp) 

801 

802 if fp <= f_bar + eta - gamma * alpha_p**2 * f_k: 

803 alpha = alpha_p 

804 break 

805 

806 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 

807 

808 xp = x_k - alpha_m * d 

809 fp, Fp = f(xp) 

810 

811 if fp <= f_bar + eta - gamma * alpha_m**2 * f_k: 

812 alpha = -alpha_m 

813 break 

814 

815 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 

816 

817 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 

818 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 

819 

820 return alpha, xp, fp, Fp 

821 

822 

823def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta, 

824 gamma=1e-4, tau_min=0.1, tau_max=0.5, 

825 nu=0.85): 

826 """ 

827 Nonmonotone line search from [1] 

828 

829 Parameters 

830 ---------- 

831 f : callable 

832 Function returning a tuple ``(f, F)`` where ``f`` is the value 

833 of a merit function and ``F`` the residual. 

834 x_k : ndarray 

835 Initial position. 

836 d : ndarray 

837 Search direction. 

838 f_k : float 

839 Initial merit function value. 

840 C, Q : float 

841 Control parameters. On the first iteration, give values 

842 Q=1.0, C=f_k 

843 eta : float 

844 Allowed merit function increase, see [1]_ 

845 nu, gamma, tau_min, tau_max : float, optional 

846 Search parameters, see [1]_ 

847 

848 Returns 

849 ------- 

850 alpha : float 

851 Step length 

852 xp : ndarray 

853 Next position 

854 fp : float 

855 Merit function value at next position 

856 Fp : ndarray 

857 Residual at next position 

858 C : float 

859 New value for the control parameter C 

860 Q : float 

861 New value for the control parameter Q 

862 

863 References 

864 ---------- 

865 .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line 

866 search and its application to the spectral residual 

867 method'', IMA J. Numer. Anal. 29, 814 (2009). 

868 

869 """ 

870 alpha_p = 1 

871 alpha_m = 1 

872 alpha = 1 

873 

874 while True: 

875 xp = x_k + alpha_p * d 

876 fp, Fp = f(xp) 

877 

878 if fp <= C + eta - gamma * alpha_p**2 * f_k: 

879 alpha = alpha_p 

880 break 

881 

882 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 

883 

884 xp = x_k - alpha_m * d 

885 fp, Fp = f(xp) 

886 

887 if fp <= C + eta - gamma * alpha_m**2 * f_k: 

888 alpha = -alpha_m 

889 break 

890 

891 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 

892 

893 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 

894 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 

895 

896 # Update C and Q 

897 Q_next = nu * Q + 1 

898 C = (nu * Q * (C + eta) + fp) / Q_next 

899 Q = Q_next 

900 

901 return alpha, xp, fp, Fp, C, Q