Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3 

4Created on Fri Aug 17 13:10:52 2012 

5 

6Author: Josef Perktold 

7License: BSD-3 

8""" 

9 

10import numpy as np 

11import scipy.sparse as sparse 

12from scipy.sparse.linalg import svds 

13from scipy.optimize import fminbound 

14import warnings 

15 

16from statsmodels.tools.tools import Bunch 

17from statsmodels.tools.sm_exceptions import ( 

18 IterationLimitWarning, iteration_limit_doc) 

19 

20 

21def clip_evals(x, value=0): # threshold=0, value=0): 

22 evals, evecs = np.linalg.eigh(x) 

23 clipped = np.any(evals < value) 

24 x_new = np.dot(evecs * np.maximum(evals, value), evecs.T) 

25 return x_new, clipped 

26 

27 

28def corr_nearest(corr, threshold=1e-15, n_fact=100): 

29 ''' 

30 Find the nearest correlation matrix that is positive semi-definite. 

31 

32 The function iteratively adjust the correlation matrix by clipping the 

33 eigenvalues of a difference matrix. The diagonal elements are set to one. 

34 

35 Parameters 

36 ---------- 

37 corr : ndarray, (k, k) 

38 initial correlation matrix 

39 threshold : float 

40 clipping threshold for smallest eigenvalue, see Notes 

41 n_fact : int or float 

42 factor to determine the maximum number of iterations. The maximum 

43 number of iterations is the integer part of the number of columns in 

44 the correlation matrix times n_fact. 

45 

46 Returns 

47 ------- 

48 corr_new : ndarray, (optional) 

49 corrected correlation matrix 

50 

51 Notes 

52 ----- 

53 The smallest eigenvalue of the corrected correlation matrix is 

54 approximately equal to the ``threshold``. 

55 If the threshold=0, then the smallest eigenvalue of the correlation matrix 

56 might be negative, but zero within a numerical error, for example in the 

57 range of -1e-16. 

58 

59 Assumes input correlation matrix is symmetric. 

60 

61 Stops after the first step if correlation matrix is already positive 

62 semi-definite or positive definite, so that smallest eigenvalue is above 

63 threshold. In this case, the returned array is not the original, but 

64 is equal to it within numerical precision. 

65 

66 See Also 

67 -------- 

68 corr_clipped 

69 cov_nearest 

70 

71 ''' 

72 k_vars = corr.shape[0] 

73 if k_vars != corr.shape[1]: 

74 raise ValueError("matrix is not square") 

75 

76 diff = np.zeros(corr.shape) 

77 x_new = corr.copy() 

78 diag_idx = np.arange(k_vars) 

79 

80 for ii in range(int(len(corr) * n_fact)): 

81 x_adj = x_new - diff 

82 x_psd, clipped = clip_evals(x_adj, value=threshold) 

83 if not clipped: 

84 x_new = x_psd 

85 break 

86 diff = x_psd - x_adj 

87 x_new = x_psd.copy() 

88 x_new[diag_idx, diag_idx] = 1 

89 else: 

90 warnings.warn(iteration_limit_doc, IterationLimitWarning) 

91 

92 return x_new 

93 

94 

95def corr_clipped(corr, threshold=1e-15): 

96 ''' 

97 Find a near correlation matrix that is positive semi-definite 

98 

99 This function clips the eigenvalues, replacing eigenvalues smaller than 

100 the threshold by the threshold. The new matrix is normalized, so that the 

101 diagonal elements are one. 

102 Compared to corr_nearest, the distance between the original correlation 

103 matrix and the positive definite correlation matrix is larger, however, 

104 it is much faster since it only computes eigenvalues once. 

105 

106 Parameters 

107 ---------- 

108 corr : ndarray, (k, k) 

109 initial correlation matrix 

110 threshold : float 

111 clipping threshold for smallest eigenvalue, see Notes 

112 

113 Returns 

114 ------- 

115 corr_new : ndarray, (optional) 

116 corrected correlation matrix 

117 

118 

119 Notes 

120 ----- 

121 The smallest eigenvalue of the corrected correlation matrix is 

122 approximately equal to the ``threshold``. In examples, the 

123 smallest eigenvalue can be by a factor of 10 smaller than the threshold, 

124 e.g. threshold 1e-8 can result in smallest eigenvalue in the range 

125 between 1e-9 and 1e-8. 

126 If the threshold=0, then the smallest eigenvalue of the correlation matrix 

127 might be negative, but zero within a numerical error, for example in the 

128 range of -1e-16. 

129 

130 Assumes input correlation matrix is symmetric. The diagonal elements of 

131 returned correlation matrix is set to ones. 

132 

133 If the correlation matrix is already positive semi-definite given the 

134 threshold, then the original correlation matrix is returned. 

135 

136 ``cov_clipped`` is 40 or more times faster than ``cov_nearest`` in simple 

137 example, but has a slightly larger approximation error. 

138 

139 See Also 

140 -------- 

141 corr_nearest 

142 cov_nearest 

143 

144 ''' 

145 x_new, clipped = clip_evals(corr, value=threshold) 

146 if not clipped: 

147 return corr 

148 

149 # cov2corr 

150 x_std = np.sqrt(np.diag(x_new)) 

151 x_new = x_new / x_std / x_std[:, None] 

152 return x_new 

153 

154 

155def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100, 

156 return_all=False): 

157 """ 

158 Find the nearest covariance matrix that is positive (semi-) definite 

159 

160 This leaves the diagonal, i.e. the variance, unchanged 

161 

162 Parameters 

163 ---------- 

164 cov : ndarray, (k,k) 

165 initial covariance matrix 

166 method : str 

167 if "clipped", then the faster but less accurate ``corr_clipped`` is 

168 used.if "nearest", then ``corr_nearest`` is used 

169 threshold : float 

170 clipping threshold for smallest eigen value, see Notes 

171 n_fact : int or float 

172 factor to determine the maximum number of iterations in 

173 ``corr_nearest``. See its doc string 

174 return_all : bool 

175 if False (default), then only the covariance matrix is returned. 

176 If True, then correlation matrix and standard deviation are 

177 additionally returned. 

178 

179 Returns 

180 ------- 

181 cov_ : ndarray 

182 corrected covariance matrix 

183 corr_ : ndarray, (optional) 

184 corrected correlation matrix 

185 std_ : ndarray, (optional) 

186 standard deviation 

187 

188 

189 Notes 

190 ----- 

191 This converts the covariance matrix to a correlation matrix. Then, finds 

192 the nearest correlation matrix that is positive semidefinite and converts 

193 it back to a covariance matrix using the initial standard deviation. 

194 

195 The smallest eigenvalue of the intermediate correlation matrix is 

196 approximately equal to the ``threshold``. 

197 If the threshold=0, then the smallest eigenvalue of the correlation matrix 

198 might be negative, but zero within a numerical error, for example in the 

199 range of -1e-16. 

200 

201 Assumes input covariance matrix is symmetric. 

202 

203 See Also 

204 -------- 

205 corr_nearest 

206 corr_clipped 

207 """ 

208 

209 from statsmodels.stats.moment_helpers import cov2corr, corr2cov 

210 cov_, std_ = cov2corr(cov, return_std=True) 

211 if method == 'clipped': 

212 corr_ = corr_clipped(cov_, threshold=threshold) 

213 else: # method == 'nearest' 

214 corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact) 

215 

216 cov_ = corr2cov(corr_, std_) 

217 

218 if return_all: 

219 return cov_, corr_, std_ 

220 else: 

221 return cov_ 

222 

223 

224def _nmono_linesearch(obj, grad, x, d, obj_hist, M=10, sig1=0.1, 

225 sig2=0.9, gam=1e-4, maxiter=100): 

226 """ 

227 Implements the non-monotone line search of Grippo et al. (1986), 

228 as described in Birgin, Martinez and Raydan (2013). 

229 

230 Parameters 

231 ---------- 

232 obj : real-valued function 

233 The objective function, to be minimized 

234 grad : vector-valued function 

235 The gradient of the objective function 

236 x : array_like 

237 The starting point for the line search 

238 d : array_like 

239 The search direction 

240 obj_hist : array_like 

241 Objective function history (must contain at least one value) 

242 M : positive int 

243 Number of previous function points to consider (see references 

244 for details). 

245 sig1 : real 

246 Tuning parameter, see references for details. 

247 sig2 : real 

248 Tuning parameter, see references for details. 

249 gam : real 

250 Tuning parameter, see references for details. 

251 maxiter : int 

252 The maximum number of iterations; returns Nones if convergence 

253 does not occur by this point 

254 

255 Returns 

256 ------- 

257 alpha : real 

258 The step value 

259 x : Array_like 

260 The function argument at the final step 

261 obval : Real 

262 The function value at the final step 

263 g : Array_like 

264 The gradient at the final step 

265 

266 Notes 

267 ----- 

268 The basic idea is to take a big step in the direction of the 

269 gradient, even if the function value is not decreased (but there 

270 is a maximum allowed increase in terms of the recent history of 

271 the iterates). 

272 

273 References 

274 ---------- 

275 Grippo L, Lampariello F, Lucidi S (1986). A Nonmonotone Line 

276 Search Technique for Newton's Method. SIAM Journal on Numerical 

277 Analysis, 23, 707-716. 

278 

279 E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected 

280 gradient methods: Review and perspectives. Journal of Statistical 

281 Software (preprint). 

282 """ 

283 

284 alpha = 1. 

285 last_obval = obj(x) 

286 obj_max = max(obj_hist[-M:]) 

287 

288 for iter in range(maxiter): 

289 

290 obval = obj(x + alpha*d) 

291 g = grad(x) 

292 gtd = (g * d).sum() 

293 

294 if obval <= obj_max + gam*alpha*gtd: 

295 return alpha, x + alpha*d, obval, g 

296 

297 a1 = -0.5*alpha**2*gtd / (obval - last_obval - alpha*gtd) 

298 

299 if (sig1 <= a1) and (a1 <= sig2*alpha): 

300 alpha = a1 

301 else: 

302 alpha /= 2. 

303 

304 last_obval = obval 

305 

306 return None, None, None, None 

307 

308 

309def _spg_optim(func, grad, start, project, maxiter=1e4, M=10, 

310 ctol=1e-3, maxiter_nmls=200, lam_min=1e-30, 

311 lam_max=1e30, sig1=0.1, sig2=0.9, gam=1e-4): 

312 """ 

313 Implements the spectral projected gradient method for minimizing a 

314 differentiable function on a convex domain. 

315 

316 Parameters 

317 ---------- 

318 func : real valued function 

319 The objective function to be minimized. 

320 grad : real array-valued function 

321 The gradient of the objective function 

322 start : array_like 

323 The starting point 

324 project : function 

325 In-place projection of the argument to the domain 

326 of func. 

327 ... See notes regarding additional arguments 

328 

329 Returns 

330 ------- 

331 rslt : Bunch 

332 rslt.params is the final iterate, other fields describe 

333 convergence status. 

334 

335 Notes 

336 ----- 

337 This can be an effective heuristic algorithm for problems where no 

338 guaranteed algorithm for computing a global minimizer is known. 

339 

340 There are a number of tuning parameters, but these generally 

341 should not be changed except for `maxiter` (positive integer) and 

342 `ctol` (small positive real). See the Birgin et al reference for 

343 more information about the tuning parameters. 

344 

345 Reference 

346 --------- 

347 E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected 

348 gradient methods: Review and perspectives. Journal of Statistical 

349 Software (preprint). Available at: 

350 http://www.ime.usp.br/~egbirgin/publications/bmr5.pdf 

351 """ 

352 

353 lam = min(10*lam_min, lam_max) 

354 

355 params = start.copy() 

356 gval = grad(params) 

357 

358 obj_hist = [func(params), ] 

359 

360 for itr in range(int(maxiter)): 

361 

362 # Check convergence 

363 df = params - gval 

364 project(df) 

365 df -= params 

366 if np.max(np.abs(df)) < ctol: 

367 return Bunch(**{"Converged": True, "params": params, 

368 "objective_values": obj_hist, 

369 "Message": "Converged successfully"}) 

370 

371 # The line search direction 

372 d = params - lam*gval 

373 project(d) 

374 d -= params 

375 

376 # Carry out the nonmonotone line search 

377 alpha, params1, fval, gval1 = _nmono_linesearch( 

378 func, 

379 grad, 

380 params, 

381 d, 

382 obj_hist, 

383 M=M, 

384 sig1=sig1, 

385 sig2=sig2, 

386 gam=gam, 

387 maxiter=maxiter_nmls) 

388 

389 if alpha is None: 

390 return Bunch(**{"Converged": False, "params": params, 

391 "objective_values": obj_hist, 

392 "Message": "Failed in nmono_linesearch"}) 

393 

394 obj_hist.append(fval) 

395 s = params1 - params 

396 y = gval1 - gval 

397 

398 sy = (s*y).sum() 

399 if sy <= 0: 

400 lam = lam_max 

401 else: 

402 ss = (s*s).sum() 

403 lam = max(lam_min, min(ss/sy, lam_max)) 

404 

405 params = params1 

406 gval = gval1 

407 

408 return Bunch(**{"Converged": False, "params": params, 

409 "objective_values": obj_hist, 

410 "Message": "spg_optim did not converge"}) 

411 

412 

413def _project_correlation_factors(X): 

414 """ 

415 Project a matrix into the domain of matrices whose row-wise sums 

416 of squares are less than or equal to 1. 

417 

418 The input matrix is modified in-place. 

419 """ 

420 nm = np.sqrt((X*X).sum(1)) 

421 ii = np.flatnonzero(nm > 1) 

422 if len(ii) > 0: 

423 X[ii, :] /= nm[ii][:, None] 

424 

425 

426class FactoredPSDMatrix: 

427 """ 

428 Representation of a positive semidefinite matrix in factored form. 

429 

430 The representation is constructed based on a vector `diag` and 

431 rectangular matrix `root`, such that the PSD matrix represented by 

432 the class instance is Diag + root * root', where Diag is the 

433 square diagonal matrix with `diag` on its main diagonal. 

434 

435 Parameters 

436 ---------- 

437 diag : 1d array_like 

438 See above 

439 root : 2d array_like 

440 See above 

441 

442 Notes 

443 ----- 

444 The matrix is represented internally in the form Diag^{1/2}(I + 

445 factor * scales * factor')Diag^{1/2}, where `Diag` and `scales` 

446 are diagonal matrices, and `factor` is an orthogonal matrix. 

447 """ 

448 

449 def __init__(self, diag, root): 

450 self.diag = diag 

451 self.root = root 

452 root = root / np.sqrt(diag)[:, None] 

453 u, s, vt = np.linalg.svd(root, 0) 

454 self.factor = u 

455 self.scales = s**2 

456 

457 def to_matrix(self): 

458 """ 

459 Returns the PSD matrix represented by this instance as a full 

460 (square) matrix. 

461 """ 

462 return np.diag(self.diag) + np.dot(self.root, self.root.T) 

463 

464 def decorrelate(self, rhs): 

465 """ 

466 Decorrelate the columns of `rhs`. 

467 

468 Parameters 

469 ---------- 

470 rhs : array_like 

471 A 2 dimensional array with the same number of rows as the 

472 PSD matrix represented by the class instance. 

473 

474 Returns 

475 ------- 

476 C^{-1/2} * rhs, where C is the covariance matrix represented 

477 by this class instance. 

478 

479 Notes 

480 ----- 

481 The returned matrix has the identity matrix as its row-wise 

482 population covariance matrix. 

483 

484 This function exploits the factor structure for efficiency. 

485 """ 

486 

487 # I + factor * qval * factor' is the inverse square root of 

488 # the covariance matrix in the homogeneous case where diag = 

489 # 1. 

490 qval = -1 + 1 / np.sqrt(1 + self.scales) 

491 

492 # Decorrelate in the general case. 

493 rhs = rhs / np.sqrt(self.diag)[:, None] 

494 rhs1 = np.dot(self.factor.T, rhs) 

495 rhs1 *= qval[:, None] 

496 rhs1 = np.dot(self.factor, rhs1) 

497 rhs += rhs1 

498 

499 return rhs 

500 

501 def solve(self, rhs): 

502 """ 

503 Solve a linear system of equations with factor-structured 

504 coefficients. 

505 

506 Parameters 

507 ---------- 

508 rhs : array_like 

509 A 2 dimensional array with the same number of rows as the 

510 PSD matrix represented by the class instance. 

511 

512 Returns 

513 ------- 

514 C^{-1} * rhs, where C is the covariance matrix represented 

515 by this class instance. 

516 

517 Notes 

518 ----- 

519 This function exploits the factor structure for efficiency. 

520 """ 

521 

522 qval = -self.scales / (1 + self.scales) 

523 dr = np.sqrt(self.diag) 

524 rhs = rhs / dr[:, None] 

525 mat = qval[:, None] * np.dot(self.factor.T, rhs) 

526 rhs = rhs + np.dot(self.factor, mat) 

527 return rhs / dr[:, None] 

528 

529 def logdet(self): 

530 """ 

531 Returns the logarithm of the determinant of a 

532 factor-structured matrix. 

533 """ 

534 

535 logdet = np.sum(np.log(self.diag)) 

536 logdet += np.sum(np.log(self.scales)) 

537 logdet += np.sum(np.log(1 + 1 / self.scales)) 

538 

539 return logdet 

540 

541 

542def corr_nearest_factor(corr, rank, ctol=1e-6, lam_min=1e-30, 

543 lam_max=1e30, maxiter=1000): 

544 """ 

545 Find the nearest correlation matrix with factor structure to a 

546 given square matrix. 

547 

548 Parameters 

549 ---------- 

550 corr : square array 

551 The target matrix (to which the nearest correlation matrix is 

552 sought). Must be square, but need not be positive 

553 semidefinite. 

554 rank : int 

555 The rank of the factor structure of the solution, i.e., the 

556 number of linearly independent columns of X. 

557 ctol : positive real 

558 Convergence criterion. 

559 lam_min : float 

560 Tuning parameter for spectral projected gradient optimization 

561 (smallest allowed step in the search direction). 

562 lam_max : float 

563 Tuning parameter for spectral projected gradient optimization 

564 (largest allowed step in the search direction). 

565 maxiter : int 

566 Maximum number of iterations in spectral projected gradient 

567 optimization. 

568 

569 Returns 

570 ------- 

571 rslt : Bunch 

572 rslt.corr is a FactoredPSDMatrix defining the estimated 

573 correlation structure. Other fields of `rslt` contain 

574 returned values from spg_optim. 

575 

576 Notes 

577 ----- 

578 A correlation matrix has factor structure if it can be written in 

579 the form I + XX' - diag(XX'), where X is n x k with linearly 

580 independent columns, and with each row having sum of squares at 

581 most equal to 1. The approximation is made in terms of the 

582 Frobenius norm. 

583 

584 This routine is useful when one has an approximate correlation 

585 matrix that is not positive semidefinite, and there is need to 

586 estimate the inverse, square root, or inverse square root of the 

587 population correlation matrix. The factor structure allows these 

588 tasks to be done without constructing any n x n matrices. 

589 

590 This is a non-convex problem with no known guaranteed globally 

591 convergent algorithm for computing the solution. Borsdof, Higham 

592 and Raydan (2010) compared several methods for this problem and 

593 found the spectral projected gradient (SPG) method (used here) to 

594 perform best. 

595 

596 The input matrix `corr` can be a dense numpy array or any scipy 

597 sparse matrix. The latter is useful if the input matrix is 

598 obtained by thresholding a very large sample correlation matrix. 

599 If `corr` is sparse, the calculations are optimized to save 

600 memory, so no working matrix with more than 10^6 elements is 

601 constructed. 

602 

603 References 

604 ---------- 

605 .. [*] R Borsdof, N Higham, M Raydan (2010). Computing a nearest 

606 correlation matrix with factor structure. SIAM J Matrix Anal Appl, 

607 31:5, 2603-2622. 

608 http://eprints.ma.man.ac.uk/1523/01/covered/MIMS_ep2009_87.pdf 

609 

610 Examples 

611 -------- 

612 Hard thresholding a correlation matrix may result in a matrix that 

613 is not positive semidefinite. We can approximate a hard 

614 thresholded correlation matrix with a PSD matrix as follows, where 

615 `corr` is the input correlation matrix. 

616 

617 >>> import numpy as np 

618 >>> from statsmodels.stats.correlation_tools import corr_nearest_factor 

619 >>> np.random.seed(1234) 

620 >>> b = 1.5 - np.random.rand(10, 1) 

621 >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10) 

622 >>> corr = np.corrcoef(x.T) 

623 >>> corr = corr * (np.abs(corr) >= 0.3) 

624 >>> rslt = corr_nearest_factor(corr, 3) 

625 """ 

626 

627 p, _ = corr.shape 

628 

629 # Starting values (following the PCA method in BHR). 

630 u, s, vt = svds(corr, rank) 

631 X = u * np.sqrt(s) 

632 nm = np.sqrt((X**2).sum(1)) 

633 ii = np.flatnonzero(nm > 1e-5) 

634 X[ii, :] /= nm[ii][:, None] 

635 

636 # Zero the diagonal 

637 corr1 = corr.copy() 

638 if type(corr1) == np.ndarray: 

639 np.fill_diagonal(corr1, 0) 

640 elif sparse.issparse(corr1): 

641 corr1.setdiag(np.zeros(corr1.shape[0])) 

642 corr1.eliminate_zeros() 

643 corr1.sort_indices() 

644 else: 

645 raise ValueError("Matrix type not supported") 

646 

647 # The gradient, from lemma 4.1 of BHR. 

648 def grad(X): 

649 gr = np.dot(X, np.dot(X.T, X)) 

650 if type(corr1) == np.ndarray: 

651 gr -= np.dot(corr1, X) 

652 else: 

653 gr -= corr1.dot(X) 

654 gr -= (X*X).sum(1)[:, None] * X 

655 return 4*gr 

656 

657 # The objective function (sum of squared deviations between fitted 

658 # and observed arrays). 

659 def func(X): 

660 if type(corr1) == np.ndarray: 

661 M = np.dot(X, X.T) 

662 np.fill_diagonal(M, 0) 

663 M -= corr1 

664 fval = (M*M).sum() 

665 return fval 

666 else: 

667 fval = 0. 

668 # Control the size of intermediates 

669 max_ws = 1e6 

670 bs = int(max_ws / X.shape[0]) 

671 ir = 0 

672 while ir < X.shape[0]: 

673 ir2 = min(ir+bs, X.shape[0]) 

674 u = np.dot(X[ir:ir2, :], X.T) 

675 ii = np.arange(u.shape[0]) 

676 u[ii, ir+ii] = 0 

677 u -= np.asarray(corr1[ir:ir2, :].todense()) 

678 fval += (u*u).sum() 

679 ir += bs 

680 return fval 

681 

682 rslt = _spg_optim(func, grad, X, _project_correlation_factors, ctol=ctol, 

683 lam_min=lam_min, lam_max=lam_max, maxiter=maxiter) 

684 root = rslt.params 

685 diag = 1 - (root**2).sum(1) 

686 soln = FactoredPSDMatrix(diag, root) 

687 rslt.corr = soln 

688 del rslt.params 

689 return rslt 

690 

691 

692def cov_nearest_factor_homog(cov, rank): 

693 """ 

694 Approximate an arbitrary square matrix with a factor-structured 

695 matrix of the form k*I + XX'. 

696 

697 Parameters 

698 ---------- 

699 cov : array_like 

700 The input array, must be square but need not be positive 

701 semidefinite 

702 rank : int 

703 The rank of the fitted factor structure 

704 

705 Returns 

706 ------- 

707 A FactoredPSDMatrix instance containing the fitted matrix 

708 

709 Notes 

710 ----- 

711 This routine is useful if one has an estimated covariance matrix 

712 that is not SPD, and the ultimate goal is to estimate the inverse, 

713 square root, or inverse square root of the true covariance 

714 matrix. The factor structure allows these tasks to be performed 

715 without constructing any n x n matrices. 

716 

717 The calculations use the fact that if k is known, then X can be 

718 determined from the eigen-decomposition of cov - k*I, which can 

719 in turn be easily obtained form the eigen-decomposition of `cov`. 

720 Thus the problem can be reduced to a 1-dimensional search for k 

721 that does not require repeated eigen-decompositions. 

722 

723 If the input matrix is sparse, then cov - k*I is also sparse, so 

724 the eigen-decomposition can be done efficiently using sparse 

725 routines. 

726 

727 The one-dimensional search for the optimal value of k is not 

728 convex, so a local minimum could be obtained. 

729 

730 Examples 

731 -------- 

732 Hard thresholding a covariance matrix may result in a matrix that 

733 is not positive semidefinite. We can approximate a hard 

734 thresholded covariance matrix with a PSD matrix as follows: 

735 

736 >>> import numpy as np 

737 >>> np.random.seed(1234) 

738 >>> b = 1.5 - np.random.rand(10, 1) 

739 >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10) 

740 >>> cov = np.cov(x) 

741 >>> cov = cov * (np.abs(cov) >= 0.3) 

742 >>> rslt = cov_nearest_factor_homog(cov, 3) 

743 """ 

744 

745 m, n = cov.shape 

746 

747 Q, Lambda, _ = svds(cov, rank) 

748 

749 if sparse.issparse(cov): 

750 QSQ = np.dot(Q.T, cov.dot(Q)) 

751 ts = cov.diagonal().sum() 

752 tss = cov.dot(cov).diagonal().sum() 

753 else: 

754 QSQ = np.dot(Q.T, np.dot(cov, Q)) 

755 ts = np.trace(cov) 

756 tss = np.trace(np.dot(cov, cov)) 

757 

758 def fun(k): 

759 Lambda_t = Lambda - k 

760 v = tss + m*(k**2) + np.sum(Lambda_t**2) - 2*k*ts 

761 v += 2*k*np.sum(Lambda_t) - 2*np.sum(np.diag(QSQ) * Lambda_t) 

762 return v 

763 

764 # Get the optimal decomposition 

765 k_opt = fminbound(fun, 0, 1e5) 

766 Lambda_opt = Lambda - k_opt 

767 fac_opt = Q * np.sqrt(Lambda_opt) 

768 

769 diag = k_opt * np.ones(m, dtype=np.float64) # - (fac_opt**2).sum(1) 

770 return FactoredPSDMatrix(diag, fac_opt) 

771 

772 

773def corr_thresholded(data, minabs=None, max_elt=1e7): 

774 r""" 

775 Construct a sparse matrix containing the thresholded row-wise 

776 correlation matrix from a data array. 

777 

778 Parameters 

779 ---------- 

780 data : array_like 

781 The data from which the row-wise thresholded correlation 

782 matrix is to be computed. 

783 minabs : non-negative real 

784 The threshold value; correlation coefficients smaller in 

785 magnitude than minabs are set to zero. If None, defaults 

786 to 1 / sqrt(n), see Notes for more information. 

787 

788 Returns 

789 ------- 

790 cormat : sparse.coo_matrix 

791 The thresholded correlation matrix, in COO format. 

792 

793 Notes 

794 ----- 

795 This is an alternative to C = np.corrcoef(data); C \*= (np.abs(C) 

796 >= absmin), suitable for very tall data matrices. 

797 

798 If the data are jointly Gaussian, the marginal sampling 

799 distributions of the elements of the sample correlation matrix are 

800 approximately Gaussian with standard deviation 1 / sqrt(n). The 

801 default value of ``minabs`` is thus equal to 1 standard error, which 

802 will set to zero approximately 68% of the estimated correlation 

803 coefficients for which the population value is zero. 

804 

805 No intermediate matrix with more than ``max_elt`` values will be 

806 constructed. However memory use could still be high if a large 

807 number of correlation values exceed `minabs` in magnitude. 

808 

809 The thresholded matrix is returned in COO format, which can easily 

810 be converted to other sparse formats. 

811 

812 Examples 

813 -------- 

814 Here X is a tall data matrix (e.g. with 100,000 rows and 50 

815 columns). The row-wise correlation matrix of X is calculated 

816 and stored in sparse form, with all entries smaller than 0.3 

817 treated as 0. 

818 

819 >>> import numpy as np 

820 >>> np.random.seed(1234) 

821 >>> b = 1.5 - np.random.rand(10, 1) 

822 >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10) 

823 >>> cmat = corr_thresholded(x, 0.3) 

824 """ 

825 

826 nrow, ncol = data.shape 

827 

828 if minabs is None: 

829 minabs = 1. / float(ncol) 

830 

831 # Row-standardize the data 

832 data = data.copy() 

833 data -= data.mean(1)[:, None] 

834 sd = data.std(1, ddof=1) 

835 ii = np.flatnonzero(sd > 1e-5) 

836 data[ii, :] /= sd[ii][:, None] 

837 ii = np.flatnonzero(sd <= 1e-5) 

838 data[ii, :] = 0 

839 

840 # Number of rows to process in one pass 

841 bs = int(np.floor(max_elt / nrow)) 

842 

843 ipos_all, jpos_all, cor_values = [], [], [] 

844 

845 ir = 0 

846 while ir < nrow: 

847 ir2 = min(data.shape[0], ir + bs) 

848 cm = np.dot(data[ir:ir2, :], data.T) / (ncol - 1) 

849 cma = np.abs(cm) 

850 ipos, jpos = np.nonzero(cma >= minabs) 

851 ipos_all.append(ipos + ir) 

852 jpos_all.append(jpos) 

853 cor_values.append(cm[ipos, jpos]) 

854 ir += bs 

855 

856 ipos = np.concatenate(ipos_all) 

857 jpos = np.concatenate(jpos_all) 

858 cor_values = np.concatenate(cor_values) 

859 

860 cmat = sparse.coo_matrix((cor_values, (ipos, jpos)), (nrow, nrow)) 

861 

862 return cmat 

863 

864 

865class MultivariateKernel(object): 

866 """ 

867 Base class for multivariate kernels. 

868 

869 An instance of MultivariateKernel implements a `call` method having 

870 signature `call(x, loc)`, returning the kernel weights comparing `x` 

871 (a 1d ndarray) to each row of `loc` (a 2d ndarray). 

872 """ 

873 

874 def call(self, x, loc): 

875 raise NotImplementedError 

876 

877 def set_bandwidth(self, bw): 

878 """ 

879 Set the bandwidth to the given vector. 

880 

881 Parameters 

882 ---------- 

883 bw : array_like 

884 A vector of non-negative bandwidth values. 

885 """ 

886 

887 self.bw = bw 

888 self._setup() 

889 

890 def _setup(self): 

891 

892 # Precompute the squared bandwidth values. 

893 self.bwk = np.prod(self.bw) 

894 self.bw2 = self.bw * self.bw 

895 

896 def set_default_bw(self, loc, bwm=None): 

897 """ 

898 Set default bandwiths based on domain values. 

899 

900 Parameters 

901 ---------- 

902 loc : array_like 

903 Values from the domain to which the kernel will 

904 be applied. 

905 bwm : scalar, optional 

906 A non-negative scalar that is used to multiply 

907 the default bandwidth. 

908 """ 

909 

910 sd = loc.std(0) 

911 q25, q75 = np.percentile(loc, [25, 75], axis=0) 

912 iqr = (q75 - q25) / 1.349 

913 bw = np.where(iqr < sd, iqr, sd) 

914 bw *= 0.9 / loc.shape[0] ** 0.2 

915 

916 if bwm is not None: 

917 bw *= bwm 

918 

919 # The final bandwidths 

920 self.bw = np.asarray(bw, dtype=np.float64) 

921 

922 self._setup() 

923 

924 

925class GaussianMultivariateKernel(MultivariateKernel): 

926 """ 

927 The Gaussian (squared exponential) multivariate kernel. 

928 """ 

929 

930 def call(self, x, loc): 

931 return np.exp(-(x - loc)**2 / (2 * self.bw2)).sum(1) / self.bwk 

932 

933 

934def kernel_covariance(exog, loc, groups, kernel=None, bw=None): 

935 """ 

936 Use kernel averaging to estimate a multivariate covariance function. 

937 

938 The goal is to estimate a covariance function C(x, y) = 

939 cov(Z(x), Z(y)) where x, y are vectors in R^p (e.g. representing 

940 locations in time or space), and Z(.) represents a multivariate 

941 process on R^p. 

942 

943 The data used for estimation can be observed at arbitrary values of the 

944 position vector, and there can be multiple independent observations 

945 from the process. 

946 

947 Parameters 

948 ---------- 

949 exog : array_like 

950 The rows of exog are realizations of the process obtained at 

951 specified points. 

952 loc : array_like 

953 The rows of loc are the locations (e.g. in space or time) at 

954 which the rows of exog are observed. 

955 groups : array_like 

956 The values of groups are labels for distinct independent copies 

957 of the process. 

958 kernel : MultivariateKernel instance, optional 

959 An instance of MultivariateKernel, defaults to 

960 GaussianMultivariateKernel. 

961 bw : array_like or scalar 

962 A bandwidth vector, or bandwidth multiplier. If a 1d array, it 

963 contains kernel bandwidths for each component of the process, and 

964 must have length equal to the number of columns of exog. If a scalar, 

965 bw is a bandwidth multiplier used to adjust the default bandwidth; if 

966 None, a default bandwidth is used. 

967 

968 Returns 

969 ------- 

970 A real-valued function C(x, y) that returns an estimate of the covariance 

971 between values of the process located at x and y. 

972 

973 References 

974 ---------- 

975 .. [1] Genton M, W Kleiber (2015). Cross covariance functions for 

976 multivariate geostatics. Statistical Science 30(2). 

977 https://arxiv.org/pdf/1507.08017.pdf 

978 """ 

979 

980 exog = np.asarray(exog) 

981 loc = np.asarray(loc) 

982 groups = np.asarray(groups) 

983 

984 if loc.ndim == 1: 

985 loc = loc[:, None] 

986 

987 v = [exog.shape[0], loc.shape[0], len(groups)] 

988 if min(v) != max(v): 

989 msg = "exog, loc, and groups must have the same number of rows" 

990 raise ValueError(msg) 

991 

992 # Map from group labels to the row indices in each group. 

993 ix = {} 

994 for i, g in enumerate(groups): 

995 if g not in ix: 

996 ix[g] = [] 

997 ix[g].append(i) 

998 for g in ix.keys(): 

999 ix[g] = np.sort(ix[g]) 

1000 

1001 if kernel is None: 

1002 kernel = GaussianMultivariateKernel() 

1003 

1004 if bw is None: 

1005 kernel.set_default_bw(loc) 

1006 elif np.isscalar(bw): 

1007 kernel.set_default_bw(loc, bwm=bw) 

1008 else: 

1009 kernel.set_bandwidth(bw) 

1010 

1011 def cov(x, y): 

1012 

1013 kx = kernel.call(x, loc) 

1014 ky = kernel.call(y, loc) 

1015 

1016 cm, cw = 0., 0. 

1017 

1018 for g, ii in ix.items(): 

1019 

1020 m = len(ii) 

1021 j1, j2 = np.indices((m, m)) 

1022 j1 = ii[j1.flat] 

1023 j2 = ii[j2.flat] 

1024 w = kx[j1] * ky[j2] 

1025 

1026 # TODO: some other form of broadcasting may be faster than 

1027 # einsum here 

1028 cm += np.einsum("ij,ik,i->jk", exog[j1, :], exog[j2, :], w) 

1029 cw += w.sum() 

1030 

1031 if cw < 1e-10: 

1032 msg = ("Effective sample size is 0. The bandwidth may be too " + 

1033 "small, or you are outside the range of your data.") 

1034 warnings.warn(msg) 

1035 return np.nan * np.ones_like(cm) 

1036 

1037 return cm / cw 

1038 

1039 return cov