Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# Author: Joris Vankerschaver 2013 

3# 

4import math 

5import numpy as np 

6from numpy import asarray_chkfinite, asarray 

7import scipy.linalg 

8from scipy._lib import doccer 

9from scipy.special import gammaln, psi, multigammaln, xlogy, entr 

10from scipy._lib._util import check_random_state 

11from scipy.linalg.blas import drot 

12from scipy.linalg.misc import LinAlgError 

13from scipy.linalg.lapack import get_lapack_funcs 

14 

15from ._discrete_distns import binom 

16from . import mvn 

17 

18__all__ = ['multivariate_normal', 

19 'matrix_normal', 

20 'dirichlet', 

21 'wishart', 

22 'invwishart', 

23 'multinomial', 

24 'special_ortho_group', 

25 'ortho_group', 

26 'random_correlation', 

27 'unitary_group'] 

28 

29_LOG_2PI = np.log(2 * np.pi) 

30_LOG_2 = np.log(2) 

31_LOG_PI = np.log(np.pi) 

32 

33 

34_doc_random_state = """\ 

35random_state : {None, int, np.random.RandomState, np.random.Generator}, optional 

36 Used for drawing random variates. 

37 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

38 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

39 with seed. 

40 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

41 then that object is used. 

42 Default is None. 

43""" 

44 

45 

46def _squeeze_output(out): 

47 """ 

48 Remove single-dimensional entries from array and convert to scalar, 

49 if necessary. 

50 

51 """ 

52 out = out.squeeze() 

53 if out.ndim == 0: 

54 out = out[()] 

55 return out 

56 

57 

58def _eigvalsh_to_eps(spectrum, cond=None, rcond=None): 

59 """ 

60 Determine which eigenvalues are "small" given the spectrum. 

61 

62 This is for compatibility across various linear algebra functions 

63 that should agree about whether or not a Hermitian matrix is numerically 

64 singular and what is its numerical matrix rank. 

65 This is designed to be compatible with scipy.linalg.pinvh. 

66 

67 Parameters 

68 ---------- 

69 spectrum : 1d ndarray 

70 Array of eigenvalues of a Hermitian matrix. 

71 cond, rcond : float, optional 

72 Cutoff for small eigenvalues. 

73 Singular values smaller than rcond * largest_eigenvalue are 

74 considered zero. 

75 If None or -1, suitable machine precision is used. 

76 

77 Returns 

78 ------- 

79 eps : float 

80 Magnitude cutoff for numerical negligibility. 

81 

82 """ 

83 if rcond is not None: 

84 cond = rcond 

85 if cond in [None, -1]: 

86 t = spectrum.dtype.char.lower() 

87 factor = {'f': 1E3, 'd': 1E6} 

88 cond = factor[t] * np.finfo(t).eps 

89 eps = cond * np.max(abs(spectrum)) 

90 return eps 

91 

92 

93def _pinv_1d(v, eps=1e-5): 

94 """ 

95 A helper function for computing the pseudoinverse. 

96 

97 Parameters 

98 ---------- 

99 v : iterable of numbers 

100 This may be thought of as a vector of eigenvalues or singular values. 

101 eps : float 

102 Values with magnitude no greater than eps are considered negligible. 

103 

104 Returns 

105 ------- 

106 v_pinv : 1d float ndarray 

107 A vector of pseudo-inverted numbers. 

108 

109 """ 

110 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float) 

111 

112 

113class _PSD(object): 

114 """ 

115 Compute coordinated functions of a symmetric positive semidefinite matrix. 

116 

117 This class addresses two issues. Firstly it allows the pseudoinverse, 

118 the logarithm of the pseudo-determinant, and the rank of the matrix 

119 to be computed using one call to eigh instead of three. 

120 Secondly it allows these functions to be computed in a way 

121 that gives mutually compatible results. 

122 All of the functions are computed with a common understanding as to 

123 which of the eigenvalues are to be considered negligibly small. 

124 The functions are designed to coordinate with scipy.linalg.pinvh() 

125 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank(). 

126 

127 Parameters 

128 ---------- 

129 M : array_like 

130 Symmetric positive semidefinite matrix (2-D). 

131 cond, rcond : float, optional 

132 Cutoff for small eigenvalues. 

133 Singular values smaller than rcond * largest_eigenvalue are 

134 considered zero. 

135 If None or -1, suitable machine precision is used. 

136 lower : bool, optional 

137 Whether the pertinent array data is taken from the lower 

138 or upper triangle of M. (Default: lower) 

139 check_finite : bool, optional 

140 Whether to check that the input matrices contain only finite 

141 numbers. Disabling may give a performance gain, but may result 

142 in problems (crashes, non-termination) if the inputs do contain 

143 infinities or NaNs. 

144 allow_singular : bool, optional 

145 Whether to allow a singular matrix. (Default: True) 

146 

147 Notes 

148 ----- 

149 The arguments are similar to those of scipy.linalg.pinvh(). 

150 

151 """ 

152 

153 def __init__(self, M, cond=None, rcond=None, lower=True, 

154 check_finite=True, allow_singular=True): 

155 # Compute the symmetric eigendecomposition. 

156 # Note that eigh takes care of array conversion, chkfinite, 

157 # and assertion that the matrix is square. 

158 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite) 

159 

160 eps = _eigvalsh_to_eps(s, cond, rcond) 

161 if np.min(s) < -eps: 

162 raise ValueError('the input matrix must be positive semidefinite') 

163 d = s[s > eps] 

164 if len(d) < len(s) and not allow_singular: 

165 raise np.linalg.LinAlgError('singular matrix') 

166 s_pinv = _pinv_1d(s, eps) 

167 U = np.multiply(u, np.sqrt(s_pinv)) 

168 

169 # Initialize the eagerly precomputed attributes. 

170 self.rank = len(d) 

171 self.U = U 

172 self.log_pdet = np.sum(np.log(d)) 

173 

174 # Initialize an attribute to be lazily computed. 

175 self._pinv = None 

176 

177 @property 

178 def pinv(self): 

179 if self._pinv is None: 

180 self._pinv = np.dot(self.U, self.U.T) 

181 return self._pinv 

182 

183 

184class multi_rv_generic(object): 

185 """ 

186 Class which encapsulates common functionality between all multivariate 

187 distributions. 

188 

189 """ 

190 def __init__(self, seed=None): 

191 super(multi_rv_generic, self).__init__() 

192 self._random_state = check_random_state(seed) 

193 

194 @property 

195 def random_state(self): 

196 """ Get or set the RandomState object for generating random variates. 

197 

198 This can be either None, int, a RandomState instance, or a 

199 np.random.Generator instance. 

200 

201 If None (or np.random), use the RandomState singleton used by 

202 np.random. 

203 If already a RandomState or Generator instance, use it. 

204 If an int, use a new RandomState instance seeded with seed. 

205 

206 """ 

207 return self._random_state 

208 

209 @random_state.setter 

210 def random_state(self, seed): 

211 self._random_state = check_random_state(seed) 

212 

213 def _get_random_state(self, random_state): 

214 if random_state is not None: 

215 return check_random_state(random_state) 

216 else: 

217 return self._random_state 

218 

219 

220class multi_rv_frozen(object): 

221 """ 

222 Class which encapsulates common functionality between all frozen 

223 multivariate distributions. 

224 """ 

225 @property 

226 def random_state(self): 

227 return self._dist._random_state 

228 

229 @random_state.setter 

230 def random_state(self, seed): 

231 self._dist._random_state = check_random_state(seed) 

232 

233 

234_mvn_doc_default_callparams = """\ 

235mean : array_like, optional 

236 Mean of the distribution (default zero) 

237cov : array_like, optional 

238 Covariance matrix of the distribution (default one) 

239allow_singular : bool, optional 

240 Whether to allow a singular covariance matrix. (Default: False) 

241""" 

242 

243_mvn_doc_callparams_note = \ 

244 """Setting the parameter `mean` to `None` is equivalent to having `mean` 

245 be the zero-vector. The parameter `cov` can be a scalar, in which case 

246 the covariance matrix is the identity times that value, a vector of 

247 diagonal entries for the covariance matrix, or a two-dimensional 

248 array_like. 

249 """ 

250 

251_mvn_doc_frozen_callparams = "" 

252 

253_mvn_doc_frozen_callparams_note = \ 

254 """See class definition for a detailed description of parameters.""" 

255 

256mvn_docdict_params = { 

257 '_mvn_doc_default_callparams': _mvn_doc_default_callparams, 

258 '_mvn_doc_callparams_note': _mvn_doc_callparams_note, 

259 '_doc_random_state': _doc_random_state 

260} 

261 

262mvn_docdict_noparams = { 

263 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams, 

264 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note, 

265 '_doc_random_state': _doc_random_state 

266} 

267 

268 

269class multivariate_normal_gen(multi_rv_generic): 

270 r""" 

271 A multivariate normal random variable. 

272 

273 The `mean` keyword specifies the mean. The `cov` keyword specifies the 

274 covariance matrix. 

275 

276 Methods 

277 ------- 

278 ``pdf(x, mean=None, cov=1, allow_singular=False)`` 

279 Probability density function. 

280 ``logpdf(x, mean=None, cov=1, allow_singular=False)`` 

281 Log of the probability density function. 

282 ``cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)`` 

283 Cumulative distribution function. 

284 ``logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)`` 

285 Log of the cumulative distribution function. 

286 ``rvs(mean=None, cov=1, size=1, random_state=None)`` 

287 Draw random samples from a multivariate normal distribution. 

288 ``entropy()`` 

289 Compute the differential entropy of the multivariate normal. 

290 

291 Parameters 

292 ---------- 

293 x : array_like 

294 Quantiles, with the last axis of `x` denoting the components. 

295 %(_mvn_doc_default_callparams)s 

296 %(_doc_random_state)s 

297 

298 Alternatively, the object may be called (as a function) to fix the mean 

299 and covariance parameters, returning a "frozen" multivariate normal 

300 random variable: 

301 

302 rv = multivariate_normal(mean=None, cov=1, allow_singular=False) 

303 - Frozen object with the same methods but holding the given 

304 mean and covariance fixed. 

305 

306 Notes 

307 ----- 

308 %(_mvn_doc_callparams_note)s 

309 

310 The covariance matrix `cov` must be a (symmetric) positive 

311 semi-definite matrix. The determinant and inverse of `cov` are computed 

312 as the pseudo-determinant and pseudo-inverse, respectively, so 

313 that `cov` does not need to have full rank. 

314 

315 The probability density function for `multivariate_normal` is 

316 

317 .. math:: 

318 

319 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}} 

320 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right), 

321 

322 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix, 

323 and :math:`k` is the dimension of the space where :math:`x` takes values. 

324 

325 .. versionadded:: 0.14.0 

326 

327 Examples 

328 -------- 

329 >>> import matplotlib.pyplot as plt 

330 >>> from scipy.stats import multivariate_normal 

331 

332 >>> x = np.linspace(0, 5, 10, endpoint=False) 

333 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y 

334 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129, 

335 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349]) 

336 >>> fig1 = plt.figure() 

337 >>> ax = fig1.add_subplot(111) 

338 >>> ax.plot(x, y) 

339 

340 The input quantiles can be any shape of array, as long as the last 

341 axis labels the components. This allows us for instance to 

342 display the frozen pdf for a non-isotropic random variable in 2D as 

343 follows: 

344 

345 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01] 

346 >>> pos = np.dstack((x, y)) 

347 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]]) 

348 >>> fig2 = plt.figure() 

349 >>> ax2 = fig2.add_subplot(111) 

350 >>> ax2.contourf(x, y, rv.pdf(pos)) 

351 

352 """ 

353 

354 def __init__(self, seed=None): 

355 super(multivariate_normal_gen, self).__init__(seed) 

356 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params) 

357 

358 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None): 

359 """ 

360 Create a frozen multivariate normal distribution. 

361 

362 See `multivariate_normal_frozen` for more information. 

363 

364 """ 

365 return multivariate_normal_frozen(mean, cov, 

366 allow_singular=allow_singular, 

367 seed=seed) 

368 

369 def _process_parameters(self, dim, mean, cov): 

370 """ 

371 Infer dimensionality from mean or covariance matrix, ensure that 

372 mean and covariance are full vector resp. matrix. 

373 

374 """ 

375 

376 # Try to infer dimensionality 

377 if dim is None: 

378 if mean is None: 

379 if cov is None: 

380 dim = 1 

381 else: 

382 cov = np.asarray(cov, dtype=float) 

383 if cov.ndim < 2: 

384 dim = 1 

385 else: 

386 dim = cov.shape[0] 

387 else: 

388 mean = np.asarray(mean, dtype=float) 

389 dim = mean.size 

390 else: 

391 if not np.isscalar(dim): 

392 raise ValueError("Dimension of random variable must be " 

393 "a scalar.") 

394 

395 # Check input sizes and return full arrays for mean and cov if 

396 # necessary 

397 if mean is None: 

398 mean = np.zeros(dim) 

399 mean = np.asarray(mean, dtype=float) 

400 

401 if cov is None: 

402 cov = 1.0 

403 cov = np.asarray(cov, dtype=float) 

404 

405 if dim == 1: 

406 mean.shape = (1,) 

407 cov.shape = (1, 1) 

408 

409 if mean.ndim != 1 or mean.shape[0] != dim: 

410 raise ValueError("Array 'mean' must be a vector of length %d." % 

411 dim) 

412 if cov.ndim == 0: 

413 cov = cov * np.eye(dim) 

414 elif cov.ndim == 1: 

415 cov = np.diag(cov) 

416 elif cov.ndim == 2 and cov.shape != (dim, dim): 

417 rows, cols = cov.shape 

418 if rows != cols: 

419 msg = ("Array 'cov' must be square if it is two dimensional," 

420 " but cov.shape = %s." % str(cov.shape)) 

421 else: 

422 msg = ("Dimension mismatch: array 'cov' is of shape %s," 

423 " but 'mean' is a vector of length %d.") 

424 msg = msg % (str(cov.shape), len(mean)) 

425 raise ValueError(msg) 

426 elif cov.ndim > 2: 

427 raise ValueError("Array 'cov' must be at most two-dimensional," 

428 " but cov.ndim = %d" % cov.ndim) 

429 

430 return dim, mean, cov 

431 

432 def _process_quantiles(self, x, dim): 

433 """ 

434 Adjust quantiles array so that last axis labels the components of 

435 each data point. 

436 

437 """ 

438 x = np.asarray(x, dtype=float) 

439 

440 if x.ndim == 0: 

441 x = x[np.newaxis] 

442 elif x.ndim == 1: 

443 if dim == 1: 

444 x = x[:, np.newaxis] 

445 else: 

446 x = x[np.newaxis, :] 

447 

448 return x 

449 

450 def _logpdf(self, x, mean, prec_U, log_det_cov, rank): 

451 """ 

452 Parameters 

453 ---------- 

454 x : ndarray 

455 Points at which to evaluate the log of the probability 

456 density function 

457 mean : ndarray 

458 Mean of the distribution 

459 prec_U : ndarray 

460 A decomposition such that np.dot(prec_U, prec_U.T) 

461 is the precision matrix, i.e. inverse of the covariance matrix. 

462 log_det_cov : float 

463 Logarithm of the determinant of the covariance matrix 

464 rank : int 

465 Rank of the covariance matrix. 

466 

467 Notes 

468 ----- 

469 As this function does no argument checking, it should not be 

470 called directly; use 'logpdf' instead. 

471 

472 """ 

473 dev = x - mean 

474 maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1) 

475 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha) 

476 

477 def logpdf(self, x, mean=None, cov=1, allow_singular=False): 

478 """ 

479 Log of the multivariate normal probability density function. 

480 

481 Parameters 

482 ---------- 

483 x : array_like 

484 Quantiles, with the last axis of `x` denoting the components. 

485 %(_mvn_doc_default_callparams)s 

486 

487 Returns 

488 ------- 

489 pdf : ndarray or scalar 

490 Log of the probability density function evaluated at `x` 

491 

492 Notes 

493 ----- 

494 %(_mvn_doc_callparams_note)s 

495 

496 """ 

497 dim, mean, cov = self._process_parameters(None, mean, cov) 

498 x = self._process_quantiles(x, dim) 

499 psd = _PSD(cov, allow_singular=allow_singular) 

500 out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank) 

501 return _squeeze_output(out) 

502 

503 def pdf(self, x, mean=None, cov=1, allow_singular=False): 

504 """ 

505 Multivariate normal probability density function. 

506 

507 Parameters 

508 ---------- 

509 x : array_like 

510 Quantiles, with the last axis of `x` denoting the components. 

511 %(_mvn_doc_default_callparams)s 

512 

513 Returns 

514 ------- 

515 pdf : ndarray or scalar 

516 Probability density function evaluated at `x` 

517 

518 Notes 

519 ----- 

520 %(_mvn_doc_callparams_note)s 

521 

522 """ 

523 dim, mean, cov = self._process_parameters(None, mean, cov) 

524 x = self._process_quantiles(x, dim) 

525 psd = _PSD(cov, allow_singular=allow_singular) 

526 out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)) 

527 return _squeeze_output(out) 

528 

529 def _cdf(self, x, mean, cov, maxpts, abseps, releps): 

530 """ 

531 Parameters 

532 ---------- 

533 x : ndarray 

534 Points at which to evaluate the cumulative distribution function. 

535 mean : ndarray 

536 Mean of the distribution 

537 cov : array_like 

538 Covariance matrix of the distribution 

539 maxpts: integer 

540 The maximum number of points to use for integration 

541 abseps: float 

542 Absolute error tolerance 

543 releps: float 

544 Relative error tolerance 

545 

546 Notes 

547 ----- 

548 As this function does no argument checking, it should not be 

549 called directly; use 'cdf' instead. 

550 

551 .. versionadded:: 1.0.0 

552 

553 """ 

554 lower = np.full(mean.shape, -np.inf) 

555 # mvnun expects 1-d arguments, so process points sequentially 

556 func1d = lambda x_slice: mvn.mvnun(lower, x_slice, mean, cov, 

557 maxpts, abseps, releps)[0] 

558 out = np.apply_along_axis(func1d, -1, x) 

559 return _squeeze_output(out) 

560 

561 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 

562 abseps=1e-5, releps=1e-5): 

563 """ 

564 Log of the multivariate normal cumulative distribution function. 

565 

566 Parameters 

567 ---------- 

568 x : array_like 

569 Quantiles, with the last axis of `x` denoting the components. 

570 %(_mvn_doc_default_callparams)s 

571 maxpts: integer, optional 

572 The maximum number of points to use for integration 

573 (default `1000000*dim`) 

574 abseps: float, optional 

575 Absolute error tolerance (default 1e-5) 

576 releps: float, optional 

577 Relative error tolerance (default 1e-5) 

578 

579 Returns 

580 ------- 

581 cdf : ndarray or scalar 

582 Log of the cumulative distribution function evaluated at `x` 

583 

584 Notes 

585 ----- 

586 %(_mvn_doc_callparams_note)s 

587 

588 .. versionadded:: 1.0.0 

589 

590 """ 

591 dim, mean, cov = self._process_parameters(None, mean, cov) 

592 x = self._process_quantiles(x, dim) 

593 # Use _PSD to check covariance matrix 

594 _PSD(cov, allow_singular=allow_singular) 

595 if not maxpts: 

596 maxpts = 1000000 * dim 

597 out = np.log(self._cdf(x, mean, cov, maxpts, abseps, releps)) 

598 return out 

599 

600 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None, 

601 abseps=1e-5, releps=1e-5): 

602 """ 

603 Multivariate normal cumulative distribution function. 

604 

605 Parameters 

606 ---------- 

607 x : array_like 

608 Quantiles, with the last axis of `x` denoting the components. 

609 %(_mvn_doc_default_callparams)s 

610 maxpts: integer, optional 

611 The maximum number of points to use for integration 

612 (default `1000000*dim`) 

613 abseps: float, optional 

614 Absolute error tolerance (default 1e-5) 

615 releps: float, optional 

616 Relative error tolerance (default 1e-5) 

617 

618 Returns 

619 ------- 

620 cdf : ndarray or scalar 

621 Cumulative distribution function evaluated at `x` 

622 

623 Notes 

624 ----- 

625 %(_mvn_doc_callparams_note)s 

626 

627 .. versionadded:: 1.0.0 

628 

629 """ 

630 dim, mean, cov = self._process_parameters(None, mean, cov) 

631 x = self._process_quantiles(x, dim) 

632 # Use _PSD to check covariance matrix 

633 _PSD(cov, allow_singular=allow_singular) 

634 if not maxpts: 

635 maxpts = 1000000 * dim 

636 out = self._cdf(x, mean, cov, maxpts, abseps, releps) 

637 return out 

638 

639 def rvs(self, mean=None, cov=1, size=1, random_state=None): 

640 """ 

641 Draw random samples from a multivariate normal distribution. 

642 

643 Parameters 

644 ---------- 

645 %(_mvn_doc_default_callparams)s 

646 size : integer, optional 

647 Number of samples to draw (default 1). 

648 %(_doc_random_state)s 

649 

650 Returns 

651 ------- 

652 rvs : ndarray or scalar 

653 Random variates of size (`size`, `N`), where `N` is the 

654 dimension of the random variable. 

655 

656 Notes 

657 ----- 

658 %(_mvn_doc_callparams_note)s 

659 

660 """ 

661 dim, mean, cov = self._process_parameters(None, mean, cov) 

662 

663 random_state = self._get_random_state(random_state) 

664 out = random_state.multivariate_normal(mean, cov, size) 

665 return _squeeze_output(out) 

666 

667 def entropy(self, mean=None, cov=1): 

668 """ 

669 Compute the differential entropy of the multivariate normal. 

670 

671 Parameters 

672 ---------- 

673 %(_mvn_doc_default_callparams)s 

674 

675 Returns 

676 ------- 

677 h : scalar 

678 Entropy of the multivariate normal distribution 

679 

680 Notes 

681 ----- 

682 %(_mvn_doc_callparams_note)s 

683 

684 """ 

685 dim, mean, cov = self._process_parameters(None, mean, cov) 

686 _, logdet = np.linalg.slogdet(2 * np.pi * np.e * cov) 

687 return 0.5 * logdet 

688 

689 

690multivariate_normal = multivariate_normal_gen() 

691 

692 

693class multivariate_normal_frozen(multi_rv_frozen): 

694 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None, 

695 maxpts=None, abseps=1e-5, releps=1e-5): 

696 """ 

697 Create a frozen multivariate normal distribution. 

698 

699 Parameters 

700 ---------- 

701 mean : array_like, optional 

702 Mean of the distribution (default zero) 

703 cov : array_like, optional 

704 Covariance matrix of the distribution (default one) 

705 allow_singular : bool, optional 

706 If this flag is True then tolerate a singular 

707 covariance matrix (default False). 

708 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

709 This parameter defines the object to use for drawing random 

710 variates. 

711 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

712 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

713 with seed. 

714 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

715 then that object is used. 

716 Default is None. 

717 maxpts: integer, optional 

718 The maximum number of points to use for integration of the 

719 cumulative distribution function (default `1000000*dim`) 

720 abseps: float, optional 

721 Absolute error tolerance for the cumulative distribution function 

722 (default 1e-5) 

723 releps: float, optional 

724 Relative error tolerance for the cumulative distribution function 

725 (default 1e-5) 

726 

727 Examples 

728 -------- 

729 When called with the default parameters, this will create a 1D random 

730 variable with mean 0 and covariance 1: 

731 

732 >>> from scipy.stats import multivariate_normal 

733 >>> r = multivariate_normal() 

734 >>> r.mean 

735 array([ 0.]) 

736 >>> r.cov 

737 array([[1.]]) 

738 

739 """ 

740 self._dist = multivariate_normal_gen(seed) 

741 self.dim, self.mean, self.cov = self._dist._process_parameters( 

742 None, mean, cov) 

743 self.cov_info = _PSD(self.cov, allow_singular=allow_singular) 

744 if not maxpts: 

745 maxpts = 1000000 * self.dim 

746 self.maxpts = maxpts 

747 self.abseps = abseps 

748 self.releps = releps 

749 

750 def logpdf(self, x): 

751 x = self._dist._process_quantiles(x, self.dim) 

752 out = self._dist._logpdf(x, self.mean, self.cov_info.U, 

753 self.cov_info.log_pdet, self.cov_info.rank) 

754 return _squeeze_output(out) 

755 

756 def pdf(self, x): 

757 return np.exp(self.logpdf(x)) 

758 

759 def logcdf(self, x): 

760 return np.log(self.cdf(x)) 

761 

762 def cdf(self, x): 

763 x = self._dist._process_quantiles(x, self.dim) 

764 out = self._dist._cdf(x, self.mean, self.cov, self.maxpts, self.abseps, 

765 self.releps) 

766 return _squeeze_output(out) 

767 

768 def rvs(self, size=1, random_state=None): 

769 return self._dist.rvs(self.mean, self.cov, size, random_state) 

770 

771 def entropy(self): 

772 """ 

773 Computes the differential entropy of the multivariate normal. 

774 

775 Returns 

776 ------- 

777 h : scalar 

778 Entropy of the multivariate normal distribution 

779 

780 """ 

781 log_pdet = self.cov_info.log_pdet 

782 rank = self.cov_info.rank 

783 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet) 

784 

785 

786# Set frozen generator docstrings from corresponding docstrings in 

787# multivariate_normal_gen and fill in default strings in class docstrings 

788for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']: 

789 method = multivariate_normal_gen.__dict__[name] 

790 method_frozen = multivariate_normal_frozen.__dict__[name] 

791 method_frozen.__doc__ = doccer.docformat(method.__doc__, 

792 mvn_docdict_noparams) 

793 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params) 

794 

795_matnorm_doc_default_callparams = """\ 

796mean : array_like, optional 

797 Mean of the distribution (default: `None`) 

798rowcov : array_like, optional 

799 Among-row covariance matrix of the distribution (default: `1`) 

800colcov : array_like, optional 

801 Among-column covariance matrix of the distribution (default: `1`) 

802""" 

803 

804_matnorm_doc_callparams_note = \ 

805 """If `mean` is set to `None` then a matrix of zeros is used for the mean. 

806 The dimensions of this matrix are inferred from the shape of `rowcov` and 

807 `colcov`, if these are provided, or set to `1` if ambiguous. 

808 

809 `rowcov` and `colcov` can be two-dimensional array_likes specifying the 

810 covariance matrices directly. Alternatively, a one-dimensional array will 

811 be be interpreted as the entries of a diagonal matrix, and a scalar or 

812 zero-dimensional array will be interpreted as this value times the 

813 identity matrix. 

814 """ 

815 

816_matnorm_doc_frozen_callparams = "" 

817 

818_matnorm_doc_frozen_callparams_note = \ 

819 """See class definition for a detailed description of parameters.""" 

820 

821matnorm_docdict_params = { 

822 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams, 

823 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note, 

824 '_doc_random_state': _doc_random_state 

825} 

826 

827matnorm_docdict_noparams = { 

828 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams, 

829 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note, 

830 '_doc_random_state': _doc_random_state 

831} 

832 

833 

834class matrix_normal_gen(multi_rv_generic): 

835 r""" 

836 A matrix normal random variable. 

837 

838 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the 

839 among-row covariance matrix. The 'colcov' keyword specifies the 

840 among-column covariance matrix. 

841 

842 Methods 

843 ------- 

844 ``pdf(X, mean=None, rowcov=1, colcov=1)`` 

845 Probability density function. 

846 ``logpdf(X, mean=None, rowcov=1, colcov=1)`` 

847 Log of the probability density function. 

848 ``rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)`` 

849 Draw random samples. 

850 

851 Parameters 

852 ---------- 

853 X : array_like 

854 Quantiles, with the last two axes of `X` denoting the components. 

855 %(_matnorm_doc_default_callparams)s 

856 %(_doc_random_state)s 

857 

858 Alternatively, the object may be called (as a function) to fix the mean 

859 and covariance parameters, returning a "frozen" matrix normal 

860 random variable: 

861 

862 rv = matrix_normal(mean=None, rowcov=1, colcov=1) 

863 - Frozen object with the same methods but holding the given 

864 mean and covariance fixed. 

865 

866 Notes 

867 ----- 

868 %(_matnorm_doc_callparams_note)s 

869 

870 The covariance matrices specified by `rowcov` and `colcov` must be 

871 (symmetric) positive definite. If the samples in `X` are 

872 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and 

873 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`. 

874 

875 The probability density function for `matrix_normal` is 

876 

877 .. math:: 

878 

879 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}} 

880 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1} 

881 (X-M)^T \right] \right), 

882 

883 where :math:`M` is the mean, :math:`U` the among-row covariance matrix, 

884 :math:`V` the among-column covariance matrix. 

885 

886 The `allow_singular` behaviour of the `multivariate_normal` 

887 distribution is not currently supported. Covariance matrices must be 

888 full rank. 

889 

890 The `matrix_normal` distribution is closely related to the 

891 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)` 

892 (the vector formed by concatenating the columns of :math:`X`) has a 

893 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)` 

894 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker 

895 product). Sampling and pdf evaluation are 

896 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but 

897 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal, 

898 making this equivalent form algorithmically inefficient. 

899 

900 .. versionadded:: 0.17.0 

901 

902 Examples 

903 -------- 

904 

905 >>> from scipy.stats import matrix_normal 

906 

907 >>> M = np.arange(6).reshape(3,2); M 

908 array([[0, 1], 

909 [2, 3], 

910 [4, 5]]) 

911 >>> U = np.diag([1,2,3]); U 

912 array([[1, 0, 0], 

913 [0, 2, 0], 

914 [0, 0, 3]]) 

915 >>> V = 0.3*np.identity(2); V 

916 array([[ 0.3, 0. ], 

917 [ 0. , 0.3]]) 

918 >>> X = M + 0.1; X 

919 array([[ 0.1, 1.1], 

920 [ 2.1, 3.1], 

921 [ 4.1, 5.1]]) 

922 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V) 

923 0.023410202050005054 

924 

925 >>> # Equivalent multivariate normal 

926 >>> from scipy.stats import multivariate_normal 

927 >>> vectorised_X = X.T.flatten() 

928 >>> equiv_mean = M.T.flatten() 

929 >>> equiv_cov = np.kron(V,U) 

930 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov) 

931 0.023410202050005054 

932 """ 

933 

934 def __init__(self, seed=None): 

935 super(matrix_normal_gen, self).__init__(seed) 

936 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params) 

937 

938 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None): 

939 """ 

940 Create a frozen matrix normal distribution. 

941 

942 See `matrix_normal_frozen` for more information. 

943 

944 """ 

945 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed) 

946 

947 def _process_parameters(self, mean, rowcov, colcov): 

948 """ 

949 Infer dimensionality from mean or covariance matrices. Handle 

950 defaults. Ensure compatible dimensions. 

951 

952 """ 

953 

954 # Process mean 

955 if mean is not None: 

956 mean = np.asarray(mean, dtype=float) 

957 meanshape = mean.shape 

958 if len(meanshape) != 2: 

959 raise ValueError("Array `mean` must be two dimensional.") 

960 if np.any(meanshape == 0): 

961 raise ValueError("Array `mean` has invalid shape.") 

962 

963 # Process among-row covariance 

964 rowcov = np.asarray(rowcov, dtype=float) 

965 if rowcov.ndim == 0: 

966 if mean is not None: 

967 rowcov = rowcov * np.identity(meanshape[0]) 

968 else: 

969 rowcov = rowcov * np.identity(1) 

970 elif rowcov.ndim == 1: 

971 rowcov = np.diag(rowcov) 

972 rowshape = rowcov.shape 

973 if len(rowshape) != 2: 

974 raise ValueError("`rowcov` must be a scalar or a 2D array.") 

975 if rowshape[0] != rowshape[1]: 

976 raise ValueError("Array `rowcov` must be square.") 

977 if rowshape[0] == 0: 

978 raise ValueError("Array `rowcov` has invalid shape.") 

979 numrows = rowshape[0] 

980 

981 # Process among-column covariance 

982 colcov = np.asarray(colcov, dtype=float) 

983 if colcov.ndim == 0: 

984 if mean is not None: 

985 colcov = colcov * np.identity(meanshape[1]) 

986 else: 

987 colcov = colcov * np.identity(1) 

988 elif colcov.ndim == 1: 

989 colcov = np.diag(colcov) 

990 colshape = colcov.shape 

991 if len(colshape) != 2: 

992 raise ValueError("`colcov` must be a scalar or a 2D array.") 

993 if colshape[0] != colshape[1]: 

994 raise ValueError("Array `colcov` must be square.") 

995 if colshape[0] == 0: 

996 raise ValueError("Array `colcov` has invalid shape.") 

997 numcols = colshape[0] 

998 

999 # Ensure mean and covariances compatible 

1000 if mean is not None: 

1001 if meanshape[0] != numrows: 

1002 raise ValueError("Arrays `mean` and `rowcov` must have the " 

1003 "same number of rows.") 

1004 if meanshape[1] != numcols: 

1005 raise ValueError("Arrays `mean` and `colcov` must have the " 

1006 "same number of columns.") 

1007 else: 

1008 mean = np.zeros((numrows, numcols)) 

1009 

1010 dims = (numrows, numcols) 

1011 

1012 return dims, mean, rowcov, colcov 

1013 

1014 def _process_quantiles(self, X, dims): 

1015 """ 

1016 Adjust quantiles array so that last two axes labels the components of 

1017 each data point. 

1018 

1019 """ 

1020 X = np.asarray(X, dtype=float) 

1021 if X.ndim == 2: 

1022 X = X[np.newaxis, :] 

1023 if X.shape[-2:] != dims: 

1024 raise ValueError("The shape of array `X` is not compatible " 

1025 "with the distribution parameters.") 

1026 return X 

1027 

1028 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov, 

1029 col_prec_rt, log_det_colcov): 

1030 """ 

1031 Parameters 

1032 ---------- 

1033 dims : tuple 

1034 Dimensions of the matrix variates 

1035 X : ndarray 

1036 Points at which to evaluate the log of the probability 

1037 density function 

1038 mean : ndarray 

1039 Mean of the distribution 

1040 row_prec_rt : ndarray 

1041 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T) 

1042 is the inverse of the among-row covariance matrix 

1043 log_det_rowcov : float 

1044 Logarithm of the determinant of the among-row covariance matrix 

1045 col_prec_rt : ndarray 

1046 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T) 

1047 is the inverse of the among-column covariance matrix 

1048 log_det_colcov : float 

1049 Logarithm of the determinant of the among-column covariance matrix 

1050 

1051 Notes 

1052 ----- 

1053 As this function does no argument checking, it should not be 

1054 called directly; use 'logpdf' instead. 

1055 

1056 """ 

1057 numrows, numcols = dims 

1058 roll_dev = np.rollaxis(X-mean, axis=-1, start=0) 

1059 scale_dev = np.tensordot(col_prec_rt.T, 

1060 np.dot(roll_dev, row_prec_rt), 1) 

1061 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0) 

1062 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov 

1063 + numrows*log_det_colcov + maha) 

1064 

1065 def logpdf(self, X, mean=None, rowcov=1, colcov=1): 

1066 """ 

1067 Log of the matrix normal probability density function. 

1068 

1069 Parameters 

1070 ---------- 

1071 X : array_like 

1072 Quantiles, with the last two axes of `X` denoting the components. 

1073 %(_matnorm_doc_default_callparams)s 

1074 

1075 Returns 

1076 ------- 

1077 logpdf : ndarray 

1078 Log of the probability density function evaluated at `X` 

1079 

1080 Notes 

1081 ----- 

1082 %(_matnorm_doc_callparams_note)s 

1083 

1084 """ 

1085 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 

1086 colcov) 

1087 X = self._process_quantiles(X, dims) 

1088 rowpsd = _PSD(rowcov, allow_singular=False) 

1089 colpsd = _PSD(colcov, allow_singular=False) 

1090 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U, 

1091 colpsd.log_pdet) 

1092 return _squeeze_output(out) 

1093 

1094 def pdf(self, X, mean=None, rowcov=1, colcov=1): 

1095 """ 

1096 Matrix normal probability density function. 

1097 

1098 Parameters 

1099 ---------- 

1100 X : array_like 

1101 Quantiles, with the last two axes of `X` denoting the components. 

1102 %(_matnorm_doc_default_callparams)s 

1103 

1104 Returns 

1105 ------- 

1106 pdf : ndarray 

1107 Probability density function evaluated at `X` 

1108 

1109 Notes 

1110 ----- 

1111 %(_matnorm_doc_callparams_note)s 

1112 

1113 """ 

1114 return np.exp(self.logpdf(X, mean, rowcov, colcov)) 

1115 

1116 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None): 

1117 """ 

1118 Draw random samples from a matrix normal distribution. 

1119 

1120 Parameters 

1121 ---------- 

1122 %(_matnorm_doc_default_callparams)s 

1123 size : integer, optional 

1124 Number of samples to draw (default 1). 

1125 %(_doc_random_state)s 

1126 

1127 Returns 

1128 ------- 

1129 rvs : ndarray or scalar 

1130 Random variates of size (`size`, `dims`), where `dims` is the 

1131 dimension of the random matrices. 

1132 

1133 Notes 

1134 ----- 

1135 %(_matnorm_doc_callparams_note)s 

1136 

1137 """ 

1138 size = int(size) 

1139 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov, 

1140 colcov) 

1141 rowchol = scipy.linalg.cholesky(rowcov, lower=True) 

1142 colchol = scipy.linalg.cholesky(colcov, lower=True) 

1143 random_state = self._get_random_state(random_state) 

1144 std_norm = random_state.standard_normal(size=(dims[1], size, dims[0])) 

1145 roll_rvs = np.tensordot(colchol, np.dot(std_norm, rowchol.T), 1) 

1146 out = np.rollaxis(roll_rvs.T, axis=1, start=0) + mean[np.newaxis, :, :] 

1147 if size == 1: 

1148 out = out.reshape(mean.shape) 

1149 return out 

1150 

1151 

1152matrix_normal = matrix_normal_gen() 

1153 

1154 

1155class matrix_normal_frozen(multi_rv_frozen): 

1156 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None): 

1157 """ 

1158 Create a frozen matrix normal distribution. 

1159 

1160 Parameters 

1161 ---------- 

1162 %(_matnorm_doc_default_callparams)s 

1163 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

1164 This parameter defines the object to use for drawing random 

1165 variates. 

1166 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

1167 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

1168 with seed. 

1169 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

1170 then that object is used. 

1171 Default is None. 

1172 

1173 Examples 

1174 -------- 

1175 >>> from scipy.stats import matrix_normal 

1176 

1177 >>> distn = matrix_normal(mean=np.zeros((3,3))) 

1178 >>> X = distn.rvs(); X 

1179 array([[-0.02976962, 0.93339138, -0.09663178], 

1180 [ 0.67405524, 0.28250467, -0.93308929], 

1181 [-0.31144782, 0.74535536, 1.30412916]]) 

1182 >>> distn.pdf(X) 

1183 2.5160642368346784e-05 

1184 >>> distn.logpdf(X) 

1185 -10.590229595124615 

1186 """ 

1187 self._dist = matrix_normal_gen(seed) 

1188 self.dims, self.mean, self.rowcov, self.colcov = \ 

1189 self._dist._process_parameters(mean, rowcov, colcov) 

1190 self.rowpsd = _PSD(self.rowcov, allow_singular=False) 

1191 self.colpsd = _PSD(self.colcov, allow_singular=False) 

1192 

1193 def logpdf(self, X): 

1194 X = self._dist._process_quantiles(X, self.dims) 

1195 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U, 

1196 self.rowpsd.log_pdet, self.colpsd.U, 

1197 self.colpsd.log_pdet) 

1198 return _squeeze_output(out) 

1199 

1200 def pdf(self, X): 

1201 return np.exp(self.logpdf(X)) 

1202 

1203 def rvs(self, size=1, random_state=None): 

1204 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size, 

1205 random_state) 

1206 

1207 

1208# Set frozen generator docstrings from corresponding docstrings in 

1209# matrix_normal_gen and fill in default strings in class docstrings 

1210for name in ['logpdf', 'pdf', 'rvs']: 

1211 method = matrix_normal_gen.__dict__[name] 

1212 method_frozen = matrix_normal_frozen.__dict__[name] 

1213 method_frozen.__doc__ = doccer.docformat(method.__doc__, 

1214 matnorm_docdict_noparams) 

1215 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params) 

1216 

1217_dirichlet_doc_default_callparams = """\ 

1218alpha : array_like 

1219 The concentration parameters. The number of entries determines the 

1220 dimensionality of the distribution. 

1221""" 

1222_dirichlet_doc_frozen_callparams = "" 

1223 

1224_dirichlet_doc_frozen_callparams_note = \ 

1225 """See class definition for a detailed description of parameters.""" 

1226 

1227dirichlet_docdict_params = { 

1228 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams, 

1229 '_doc_random_state': _doc_random_state 

1230} 

1231 

1232dirichlet_docdict_noparams = { 

1233 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams, 

1234 '_doc_random_state': _doc_random_state 

1235} 

1236 

1237 

1238def _dirichlet_check_parameters(alpha): 

1239 alpha = np.asarray(alpha) 

1240 if np.min(alpha) <= 0: 

1241 raise ValueError("All parameters must be greater than 0") 

1242 elif alpha.ndim != 1: 

1243 raise ValueError("Parameter vector 'a' must be one dimensional, " 

1244 "but a.shape = %s." % (alpha.shape, )) 

1245 return alpha 

1246 

1247 

1248def _dirichlet_check_input(alpha, x): 

1249 x = np.asarray(x) 

1250 

1251 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]: 

1252 raise ValueError("Vector 'x' must have either the same number " 

1253 "of entries as, or one entry fewer than, " 

1254 "parameter vector 'a', but alpha.shape = %s " 

1255 "and x.shape = %s." % (alpha.shape, x.shape)) 

1256 

1257 if x.shape[0] != alpha.shape[0]: 

1258 xk = np.array([1 - np.sum(x, 0)]) 

1259 if xk.ndim == 1: 

1260 x = np.append(x, xk) 

1261 elif xk.ndim == 2: 

1262 x = np.vstack((x, xk)) 

1263 else: 

1264 raise ValueError("The input must be one dimensional or a two " 

1265 "dimensional matrix containing the entries.") 

1266 

1267 if np.min(x) < 0: 

1268 raise ValueError("Each entry in 'x' must be greater than or equal " 

1269 "to zero.") 

1270 

1271 if np.max(x) > 1: 

1272 raise ValueError("Each entry in 'x' must be smaller or equal one.") 

1273 

1274 # Check x_i > 0 or alpha_i > 1 

1275 xeq0 = (x == 0) 

1276 alphalt1 = (alpha < 1) 

1277 if x.shape != alpha.shape: 

1278 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape) 

1279 chk = np.logical_and(xeq0, alphalt1) 

1280 

1281 if np.sum(chk): 

1282 raise ValueError("Each entry in 'x' must be greater than zero if its " 

1283 "alpha is less than one.") 

1284 

1285 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any(): 

1286 raise ValueError("The input vector 'x' must lie within the normal " 

1287 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0)) 

1288 

1289 return x 

1290 

1291 

1292def _lnB(alpha): 

1293 r""" 

1294 Internal helper function to compute the log of the useful quotient 

1295 

1296 .. math:: 

1297 

1298 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)} 

1299 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)} 

1300 

1301 Parameters 

1302 ---------- 

1303 %(_dirichlet_doc_default_callparams)s 

1304 

1305 Returns 

1306 ------- 

1307 B : scalar 

1308 Helper quotient, internal use only 

1309 

1310 """ 

1311 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha)) 

1312 

1313 

1314class dirichlet_gen(multi_rv_generic): 

1315 r""" 

1316 A Dirichlet random variable. 

1317 

1318 The `alpha` keyword specifies the concentration parameters of the 

1319 distribution. 

1320 

1321 .. versionadded:: 0.15.0 

1322 

1323 Methods 

1324 ------- 

1325 ``pdf(x, alpha)`` 

1326 Probability density function. 

1327 ``logpdf(x, alpha)`` 

1328 Log of the probability density function. 

1329 ``rvs(alpha, size=1, random_state=None)`` 

1330 Draw random samples from a Dirichlet distribution. 

1331 ``mean(alpha)`` 

1332 The mean of the Dirichlet distribution 

1333 ``var(alpha)`` 

1334 The variance of the Dirichlet distribution 

1335 ``entropy(alpha)`` 

1336 Compute the differential entropy of the Dirichlet distribution. 

1337 

1338 Parameters 

1339 ---------- 

1340 x : array_like 

1341 Quantiles, with the last axis of `x` denoting the components. 

1342 %(_dirichlet_doc_default_callparams)s 

1343 %(_doc_random_state)s 

1344 

1345 Alternatively, the object may be called (as a function) to fix 

1346 concentration parameters, returning a "frozen" Dirichlet 

1347 random variable: 

1348 

1349 rv = dirichlet(alpha) 

1350 - Frozen object with the same methods but holding the given 

1351 concentration parameters fixed. 

1352 

1353 Notes 

1354 ----- 

1355 Each :math:`\alpha` entry must be positive. The distribution has only 

1356 support on the simplex defined by 

1357 

1358 .. math:: 

1359 \sum_{i=1}^{K} x_i \le 1 

1360 

1361 

1362 The probability density function for `dirichlet` is 

1363 

1364 .. math:: 

1365 

1366 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1} 

1367 

1368 where 

1369 

1370 .. math:: 

1371 

1372 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)} 

1373 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)} 

1374 

1375 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the 

1376 concentration parameters and :math:`K` is the dimension of the space 

1377 where :math:`x` takes values. 

1378 

1379 Note that the dirichlet interface is somewhat inconsistent. 

1380 The array returned by the rvs function is transposed 

1381 with respect to the format expected by the pdf and logpdf. 

1382 

1383 Examples 

1384 -------- 

1385 >>> from scipy.stats import dirichlet 

1386 

1387 Generate a dirichlet random variable 

1388 

1389 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles 

1390 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters 

1391 >>> dirichlet.pdf(quantiles, alpha) 

1392 0.2843831684937255 

1393 

1394 The same PDF but following a log scale 

1395 

1396 >>> dirichlet.logpdf(quantiles, alpha) 

1397 -1.2574327653159187 

1398 

1399 Once we specify the dirichlet distribution 

1400 we can then calculate quantities of interest 

1401 

1402 >>> dirichlet.mean(alpha) # get the mean of the distribution 

1403 array([0.01960784, 0.24509804, 0.73529412]) 

1404 >>> dirichlet.var(alpha) # get variance 

1405 array([0.00089829, 0.00864603, 0.00909517]) 

1406 >>> dirichlet.entropy(alpha) # calculate the differential entropy 

1407 -4.3280162474082715 

1408 

1409 We can also return random samples from the distribution 

1410 

1411 >>> dirichlet.rvs(alpha, size=1, random_state=1) 

1412 array([[0.00766178, 0.24670518, 0.74563305]]) 

1413 >>> dirichlet.rvs(alpha, size=2, random_state=2) 

1414 array([[0.01639427, 0.1292273 , 0.85437844], 

1415 [0.00156917, 0.19033695, 0.80809388]]) 

1416 

1417 """ 

1418 

1419 def __init__(self, seed=None): 

1420 super(dirichlet_gen, self).__init__(seed) 

1421 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params) 

1422 

1423 def __call__(self, alpha, seed=None): 

1424 return dirichlet_frozen(alpha, seed=seed) 

1425 

1426 def _logpdf(self, x, alpha): 

1427 """ 

1428 Parameters 

1429 ---------- 

1430 x : ndarray 

1431 Points at which to evaluate the log of the probability 

1432 density function 

1433 %(_dirichlet_doc_default_callparams)s 

1434 

1435 Notes 

1436 ----- 

1437 As this function does no argument checking, it should not be 

1438 called directly; use 'logpdf' instead. 

1439 

1440 """ 

1441 lnB = _lnB(alpha) 

1442 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0) 

1443 

1444 def logpdf(self, x, alpha): 

1445 """ 

1446 Log of the Dirichlet probability density function. 

1447 

1448 Parameters 

1449 ---------- 

1450 x : array_like 

1451 Quantiles, with the last axis of `x` denoting the components. 

1452 %(_dirichlet_doc_default_callparams)s 

1453 

1454 Returns 

1455 ------- 

1456 pdf : ndarray or scalar 

1457 Log of the probability density function evaluated at `x`. 

1458 

1459 """ 

1460 alpha = _dirichlet_check_parameters(alpha) 

1461 x = _dirichlet_check_input(alpha, x) 

1462 

1463 out = self._logpdf(x, alpha) 

1464 return _squeeze_output(out) 

1465 

1466 def pdf(self, x, alpha): 

1467 """ 

1468 The Dirichlet probability density function. 

1469 

1470 Parameters 

1471 ---------- 

1472 x : array_like 

1473 Quantiles, with the last axis of `x` denoting the components. 

1474 %(_dirichlet_doc_default_callparams)s 

1475 

1476 Returns 

1477 ------- 

1478 pdf : ndarray or scalar 

1479 The probability density function evaluated at `x`. 

1480 

1481 """ 

1482 alpha = _dirichlet_check_parameters(alpha) 

1483 x = _dirichlet_check_input(alpha, x) 

1484 

1485 out = np.exp(self._logpdf(x, alpha)) 

1486 return _squeeze_output(out) 

1487 

1488 def mean(self, alpha): 

1489 """ 

1490 Compute the mean of the dirichlet distribution. 

1491 

1492 Parameters 

1493 ---------- 

1494 %(_dirichlet_doc_default_callparams)s 

1495 

1496 Returns 

1497 ------- 

1498 mu : ndarray or scalar 

1499 Mean of the Dirichlet distribution. 

1500 

1501 """ 

1502 alpha = _dirichlet_check_parameters(alpha) 

1503 

1504 out = alpha / (np.sum(alpha)) 

1505 return _squeeze_output(out) 

1506 

1507 def var(self, alpha): 

1508 """ 

1509 Compute the variance of the dirichlet distribution. 

1510 

1511 Parameters 

1512 ---------- 

1513 %(_dirichlet_doc_default_callparams)s 

1514 

1515 Returns 

1516 ------- 

1517 v : ndarray or scalar 

1518 Variance of the Dirichlet distribution. 

1519 

1520 """ 

1521 

1522 alpha = _dirichlet_check_parameters(alpha) 

1523 

1524 alpha0 = np.sum(alpha) 

1525 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1)) 

1526 return _squeeze_output(out) 

1527 

1528 def entropy(self, alpha): 

1529 """ 

1530 Compute the differential entropy of the dirichlet distribution. 

1531 

1532 Parameters 

1533 ---------- 

1534 %(_dirichlet_doc_default_callparams)s 

1535 

1536 Returns 

1537 ------- 

1538 h : scalar 

1539 Entropy of the Dirichlet distribution 

1540 

1541 """ 

1542 

1543 alpha = _dirichlet_check_parameters(alpha) 

1544 

1545 alpha0 = np.sum(alpha) 

1546 lnB = _lnB(alpha) 

1547 K = alpha.shape[0] 

1548 

1549 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum( 

1550 (alpha - 1) * scipy.special.psi(alpha)) 

1551 return _squeeze_output(out) 

1552 

1553 def rvs(self, alpha, size=1, random_state=None): 

1554 """ 

1555 Draw random samples from a Dirichlet distribution. 

1556 

1557 Parameters 

1558 ---------- 

1559 %(_dirichlet_doc_default_callparams)s 

1560 size : int, optional 

1561 Number of samples to draw (default 1). 

1562 %(_doc_random_state)s 

1563 

1564 Returns 

1565 ------- 

1566 rvs : ndarray or scalar 

1567 Random variates of size (`size`, `N`), where `N` is the 

1568 dimension of the random variable. 

1569 

1570 """ 

1571 alpha = _dirichlet_check_parameters(alpha) 

1572 random_state = self._get_random_state(random_state) 

1573 return random_state.dirichlet(alpha, size=size) 

1574 

1575 

1576dirichlet = dirichlet_gen() 

1577 

1578 

1579class dirichlet_frozen(multi_rv_frozen): 

1580 def __init__(self, alpha, seed=None): 

1581 self.alpha = _dirichlet_check_parameters(alpha) 

1582 self._dist = dirichlet_gen(seed) 

1583 

1584 def logpdf(self, x): 

1585 return self._dist.logpdf(x, self.alpha) 

1586 

1587 def pdf(self, x): 

1588 return self._dist.pdf(x, self.alpha) 

1589 

1590 def mean(self): 

1591 return self._dist.mean(self.alpha) 

1592 

1593 def var(self): 

1594 return self._dist.var(self.alpha) 

1595 

1596 def entropy(self): 

1597 return self._dist.entropy(self.alpha) 

1598 

1599 def rvs(self, size=1, random_state=None): 

1600 return self._dist.rvs(self.alpha, size, random_state) 

1601 

1602 

1603# Set frozen generator docstrings from corresponding docstrings in 

1604# multivariate_normal_gen and fill in default strings in class docstrings 

1605for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']: 

1606 method = dirichlet_gen.__dict__[name] 

1607 method_frozen = dirichlet_frozen.__dict__[name] 

1608 method_frozen.__doc__ = doccer.docformat( 

1609 method.__doc__, dirichlet_docdict_noparams) 

1610 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params) 

1611 

1612 

1613_wishart_doc_default_callparams = """\ 

1614df : int 

1615 Degrees of freedom, must be greater than or equal to dimension of the 

1616 scale matrix 

1617scale : array_like 

1618 Symmetric positive definite scale matrix of the distribution 

1619""" 

1620 

1621_wishart_doc_callparams_note = "" 

1622 

1623_wishart_doc_frozen_callparams = "" 

1624 

1625_wishart_doc_frozen_callparams_note = \ 

1626 """See class definition for a detailed description of parameters.""" 

1627 

1628wishart_docdict_params = { 

1629 '_doc_default_callparams': _wishart_doc_default_callparams, 

1630 '_doc_callparams_note': _wishart_doc_callparams_note, 

1631 '_doc_random_state': _doc_random_state 

1632} 

1633 

1634wishart_docdict_noparams = { 

1635 '_doc_default_callparams': _wishart_doc_frozen_callparams, 

1636 '_doc_callparams_note': _wishart_doc_frozen_callparams_note, 

1637 '_doc_random_state': _doc_random_state 

1638} 

1639 

1640 

1641class wishart_gen(multi_rv_generic): 

1642 r""" 

1643 A Wishart random variable. 

1644 

1645 The `df` keyword specifies the degrees of freedom. The `scale` keyword 

1646 specifies the scale matrix, which must be symmetric and positive definite. 

1647 In this context, the scale matrix is often interpreted in terms of a 

1648 multivariate normal precision matrix (the inverse of the covariance 

1649 matrix). 

1650 

1651 Methods 

1652 ------- 

1653 ``pdf(x, df, scale)`` 

1654 Probability density function. 

1655 ``logpdf(x, df, scale)`` 

1656 Log of the probability density function. 

1657 ``rvs(df, scale, size=1, random_state=None)`` 

1658 Draw random samples from a Wishart distribution. 

1659 ``entropy()`` 

1660 Compute the differential entropy of the Wishart distribution. 

1661 

1662 Parameters 

1663 ---------- 

1664 x : array_like 

1665 Quantiles, with the last axis of `x` denoting the components. 

1666 %(_doc_default_callparams)s 

1667 %(_doc_random_state)s 

1668 

1669 Alternatively, the object may be called (as a function) to fix the degrees 

1670 of freedom and scale parameters, returning a "frozen" Wishart random 

1671 variable: 

1672 

1673 rv = wishart(df=1, scale=1) 

1674 - Frozen object with the same methods but holding the given 

1675 degrees of freedom and scale fixed. 

1676 

1677 See Also 

1678 -------- 

1679 invwishart, chi2 

1680 

1681 Notes 

1682 ----- 

1683 %(_doc_callparams_note)s 

1684 

1685 The scale matrix `scale` must be a symmetric positive definite 

1686 matrix. Singular matrices, including the symmetric positive semi-definite 

1687 case, are not supported. 

1688 

1689 The Wishart distribution is often denoted 

1690 

1691 .. math:: 

1692 

1693 W_p(\nu, \Sigma) 

1694 

1695 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the 

1696 :math:`p \times p` scale matrix. 

1697 

1698 The probability density function for `wishart` has support over positive 

1699 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then 

1700 its PDF is given by: 

1701 

1702 .. math:: 

1703 

1704 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} } 

1705 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )} 

1706 \exp\left( -tr(\Sigma^{-1} S) / 2 \right) 

1707 

1708 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then 

1709 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart). 

1710 

1711 If the scale matrix is 1-dimensional and equal to one, then the Wishart 

1712 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)` 

1713 distribution. 

1714 

1715 .. versionadded:: 0.16.0 

1716 

1717 References 

1718 ---------- 

1719 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 

1720 Wiley, 1983. 

1721 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate 

1722 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972. 

1723 

1724 Examples 

1725 -------- 

1726 >>> import matplotlib.pyplot as plt 

1727 >>> from scipy.stats import wishart, chi2 

1728 >>> x = np.linspace(1e-5, 8, 100) 

1729 >>> w = wishart.pdf(x, df=3, scale=1); w[:5] 

1730 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 

1731 >>> c = chi2.pdf(x, 3); c[:5] 

1732 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ]) 

1733 >>> plt.plot(x, w) 

1734 

1735 The input quantiles can be any shape of array, as long as the last 

1736 axis labels the components. 

1737 

1738 """ 

1739 

1740 def __init__(self, seed=None): 

1741 super(wishart_gen, self).__init__(seed) 

1742 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 

1743 

1744 def __call__(self, df=None, scale=None, seed=None): 

1745 """ 

1746 Create a frozen Wishart distribution. 

1747 

1748 See `wishart_frozen` for more information. 

1749 

1750 """ 

1751 return wishart_frozen(df, scale, seed) 

1752 

1753 def _process_parameters(self, df, scale): 

1754 if scale is None: 

1755 scale = 1.0 

1756 scale = np.asarray(scale, dtype=float) 

1757 

1758 if scale.ndim == 0: 

1759 scale = scale[np.newaxis, np.newaxis] 

1760 elif scale.ndim == 1: 

1761 scale = np.diag(scale) 

1762 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]: 

1763 raise ValueError("Array 'scale' must be square if it is two" 

1764 " dimensional, but scale.scale = %s." 

1765 % str(scale.shape)) 

1766 elif scale.ndim > 2: 

1767 raise ValueError("Array 'scale' must be at most two-dimensional," 

1768 " but scale.ndim = %d" % scale.ndim) 

1769 

1770 dim = scale.shape[0] 

1771 

1772 if df is None: 

1773 df = dim 

1774 elif not np.isscalar(df): 

1775 raise ValueError("Degrees of freedom must be a scalar.") 

1776 elif df < dim: 

1777 raise ValueError("Degrees of freedom cannot be less than dimension" 

1778 " of scale matrix, but df = %d" % df) 

1779 

1780 return dim, df, scale 

1781 

1782 def _process_quantiles(self, x, dim): 

1783 """ 

1784 Adjust quantiles array so that last axis labels the components of 

1785 each data point. 

1786 """ 

1787 x = np.asarray(x, dtype=float) 

1788 

1789 if x.ndim == 0: 

1790 x = x * np.eye(dim)[:, :, np.newaxis] 

1791 if x.ndim == 1: 

1792 if dim == 1: 

1793 x = x[np.newaxis, np.newaxis, :] 

1794 else: 

1795 x = np.diag(x)[:, :, np.newaxis] 

1796 elif x.ndim == 2: 

1797 if not x.shape[0] == x.shape[1]: 

1798 raise ValueError("Quantiles must be square if they are two" 

1799 " dimensional, but x.shape = %s." 

1800 % str(x.shape)) 

1801 x = x[:, :, np.newaxis] 

1802 elif x.ndim == 3: 

1803 if not x.shape[0] == x.shape[1]: 

1804 raise ValueError("Quantiles must be square in the first two" 

1805 " dimensions if they are three dimensional" 

1806 ", but x.shape = %s." % str(x.shape)) 

1807 elif x.ndim > 3: 

1808 raise ValueError("Quantiles must be at most two-dimensional with" 

1809 " an additional dimension for multiple" 

1810 "components, but x.ndim = %d" % x.ndim) 

1811 

1812 # Now we have 3-dim array; should have shape [dim, dim, *] 

1813 if not x.shape[0:2] == (dim, dim): 

1814 raise ValueError('Quantiles have incompatible dimensions: should' 

1815 ' be %s, got %s.' % ((dim, dim), x.shape[0:2])) 

1816 

1817 return x 

1818 

1819 def _process_size(self, size): 

1820 size = np.asarray(size) 

1821 

1822 if size.ndim == 0: 

1823 size = size[np.newaxis] 

1824 elif size.ndim > 1: 

1825 raise ValueError('Size must be an integer or tuple of integers;' 

1826 ' thus must have dimension <= 1.' 

1827 ' Got size.ndim = %s' % str(tuple(size))) 

1828 n = size.prod() 

1829 shape = tuple(size) 

1830 

1831 return n, shape 

1832 

1833 def _logpdf(self, x, dim, df, scale, log_det_scale, C): 

1834 """ 

1835 Parameters 

1836 ---------- 

1837 x : ndarray 

1838 Points at which to evaluate the log of the probability 

1839 density function 

1840 dim : int 

1841 Dimension of the scale matrix 

1842 df : int 

1843 Degrees of freedom 

1844 scale : ndarray 

1845 Scale matrix 

1846 log_det_scale : float 

1847 Logarithm of the determinant of the scale matrix 

1848 C : ndarray 

1849 Cholesky factorization of the scale matrix, lower triagular. 

1850 

1851 Notes 

1852 ----- 

1853 As this function does no argument checking, it should not be 

1854 called directly; use 'logpdf' instead. 

1855 

1856 """ 

1857 # log determinant of x 

1858 # Note: x has components along the last axis, so that x.T has 

1859 # components alone the 0-th axis. Then since det(A) = det(A'), this 

1860 # gives us a 1-dim vector of determinants 

1861 

1862 # Retrieve tr(scale^{-1} x) 

1863 log_det_x = np.zeros(x.shape[-1]) 

1864 scale_inv_x = np.zeros(x.shape) 

1865 tr_scale_inv_x = np.zeros(x.shape[-1]) 

1866 for i in range(x.shape[-1]): 

1867 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i]) 

1868 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i]) 

1869 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace() 

1870 

1871 # Log PDF 

1872 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) - 

1873 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale + 

1874 multigammaln(0.5*df, dim))) 

1875 

1876 return out 

1877 

1878 def logpdf(self, x, df, scale): 

1879 """ 

1880 Log of the Wishart probability density function. 

1881 

1882 Parameters 

1883 ---------- 

1884 x : array_like 

1885 Quantiles, with the last axis of `x` denoting the components. 

1886 Each quantile must be a symmetric positive definite matrix. 

1887 %(_doc_default_callparams)s 

1888 

1889 Returns 

1890 ------- 

1891 pdf : ndarray 

1892 Log of the probability density function evaluated at `x` 

1893 

1894 Notes 

1895 ----- 

1896 %(_doc_callparams_note)s 

1897 

1898 """ 

1899 dim, df, scale = self._process_parameters(df, scale) 

1900 x = self._process_quantiles(x, dim) 

1901 

1902 # Cholesky decomposition of scale, get log(det(scale)) 

1903 C, log_det_scale = self._cholesky_logdet(scale) 

1904 

1905 out = self._logpdf(x, dim, df, scale, log_det_scale, C) 

1906 return _squeeze_output(out) 

1907 

1908 def pdf(self, x, df, scale): 

1909 """ 

1910 Wishart probability density function. 

1911 

1912 Parameters 

1913 ---------- 

1914 x : array_like 

1915 Quantiles, with the last axis of `x` denoting the components. 

1916 Each quantile must be a symmetric positive definite matrix. 

1917 %(_doc_default_callparams)s 

1918 

1919 Returns 

1920 ------- 

1921 pdf : ndarray 

1922 Probability density function evaluated at `x` 

1923 

1924 Notes 

1925 ----- 

1926 %(_doc_callparams_note)s 

1927 

1928 """ 

1929 return np.exp(self.logpdf(x, df, scale)) 

1930 

1931 def _mean(self, dim, df, scale): 

1932 """ 

1933 Parameters 

1934 ---------- 

1935 dim : int 

1936 Dimension of the scale matrix 

1937 %(_doc_default_callparams)s 

1938 

1939 Notes 

1940 ----- 

1941 As this function does no argument checking, it should not be 

1942 called directly; use 'mean' instead. 

1943 

1944 """ 

1945 return df * scale 

1946 

1947 def mean(self, df, scale): 

1948 """ 

1949 Mean of the Wishart distribution 

1950 

1951 Parameters 

1952 ---------- 

1953 %(_doc_default_callparams)s 

1954 

1955 Returns 

1956 ------- 

1957 mean : float 

1958 The mean of the distribution 

1959 """ 

1960 dim, df, scale = self._process_parameters(df, scale) 

1961 out = self._mean(dim, df, scale) 

1962 return _squeeze_output(out) 

1963 

1964 def _mode(self, dim, df, scale): 

1965 """ 

1966 Parameters 

1967 ---------- 

1968 dim : int 

1969 Dimension of the scale matrix 

1970 %(_doc_default_callparams)s 

1971 

1972 Notes 

1973 ----- 

1974 As this function does no argument checking, it should not be 

1975 called directly; use 'mode' instead. 

1976 

1977 """ 

1978 if df >= dim + 1: 

1979 out = (df-dim-1) * scale 

1980 else: 

1981 out = None 

1982 return out 

1983 

1984 def mode(self, df, scale): 

1985 """ 

1986 Mode of the Wishart distribution 

1987 

1988 Only valid if the degrees of freedom are greater than the dimension of 

1989 the scale matrix. 

1990 

1991 Parameters 

1992 ---------- 

1993 %(_doc_default_callparams)s 

1994 

1995 Returns 

1996 ------- 

1997 mode : float or None 

1998 The Mode of the distribution 

1999 """ 

2000 dim, df, scale = self._process_parameters(df, scale) 

2001 out = self._mode(dim, df, scale) 

2002 return _squeeze_output(out) if out is not None else out 

2003 

2004 def _var(self, dim, df, scale): 

2005 """ 

2006 Parameters 

2007 ---------- 

2008 dim : int 

2009 Dimension of the scale matrix 

2010 %(_doc_default_callparams)s 

2011 

2012 Notes 

2013 ----- 

2014 As this function does no argument checking, it should not be 

2015 called directly; use 'var' instead. 

2016 

2017 """ 

2018 var = scale**2 

2019 diag = scale.diagonal() # 1 x dim array 

2020 var += np.outer(diag, diag) 

2021 var *= df 

2022 return var 

2023 

2024 def var(self, df, scale): 

2025 """ 

2026 Variance of the Wishart distribution 

2027 

2028 Parameters 

2029 ---------- 

2030 %(_doc_default_callparams)s 

2031 

2032 Returns 

2033 ------- 

2034 var : float 

2035 The variance of the distribution 

2036 """ 

2037 dim, df, scale = self._process_parameters(df, scale) 

2038 out = self._var(dim, df, scale) 

2039 return _squeeze_output(out) 

2040 

2041 def _standard_rvs(self, n, shape, dim, df, random_state): 

2042 """ 

2043 Parameters 

2044 ---------- 

2045 n : integer 

2046 Number of variates to generate 

2047 shape : iterable 

2048 Shape of the variates to generate 

2049 dim : int 

2050 Dimension of the scale matrix 

2051 df : int 

2052 Degrees of freedom 

2053 random_state : {`~np.random.RandomState`, `~np.random.Generator`} 

2054 Object used for drawing the random variates. 

2055 

2056 Notes 

2057 ----- 

2058 As this function does no argument checking, it should not be 

2059 called directly; use 'rvs' instead. 

2060 

2061 """ 

2062 # Random normal variates for off-diagonal elements 

2063 n_tril = dim * (dim-1) // 2 

2064 covariances = random_state.normal( 

2065 size=n*n_tril).reshape(shape+(n_tril,)) 

2066 

2067 # Random chi-square variates for diagonal elements 

2068 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5 

2069 for i in range(dim)]].reshape((dim,) + 

2070 shape[::-1]).T) 

2071 

2072 # Create the A matri(ces) - lower triangular 

2073 A = np.zeros(shape + (dim, dim)) 

2074 

2075 # Input the covariances 

2076 size_idx = tuple([slice(None, None, None)]*len(shape)) 

2077 tril_idx = np.tril_indices(dim, k=-1) 

2078 A[size_idx + tril_idx] = covariances 

2079 

2080 # Input the variances 

2081 diag_idx = np.diag_indices(dim) 

2082 A[size_idx + diag_idx] = variances 

2083 

2084 return A 

2085 

2086 def _rvs(self, n, shape, dim, df, C, random_state): 

2087 """ 

2088 Parameters 

2089 ---------- 

2090 n : integer 

2091 Number of variates to generate 

2092 shape : iterable 

2093 Shape of the variates to generate 

2094 dim : int 

2095 Dimension of the scale matrix 

2096 df : int 

2097 Degrees of freedom 

2098 scale : ndarray 

2099 Scale matrix 

2100 C : ndarray 

2101 Cholesky factorization of the scale matrix, lower triangular. 

2102 %(_doc_random_state)s 

2103 

2104 Notes 

2105 ----- 

2106 As this function does no argument checking, it should not be 

2107 called directly; use 'rvs' instead. 

2108 

2109 """ 

2110 random_state = self._get_random_state(random_state) 

2111 # Calculate the matrices A, which are actually lower triangular 

2112 # Cholesky factorizations of a matrix B such that B ~ W(df, I) 

2113 A = self._standard_rvs(n, shape, dim, df, random_state) 

2114 

2115 # Calculate SA = C A A' C', where SA ~ W(df, scale) 

2116 # Note: this is the product of a (lower) (lower) (lower)' (lower)' 

2117 # or, denoting B = AA', it is C B C' where C is the lower 

2118 # triangular Cholesky factorization of the scale matrix. 

2119 # this appears to conflict with the instructions in [1]_, which 

2120 # suggest that it should be D' B D where D is the lower 

2121 # triangular factorization of the scale matrix. However, it is 

2122 # meant to refer to the Bartlett (1933) representation of a 

2123 # Wishart random variate as L A A' L' where L is lower triangular 

2124 # so it appears that understanding D' to be upper triangular 

2125 # is either a typo in or misreading of [1]_. 

2126 for index in np.ndindex(shape): 

2127 CA = np.dot(C, A[index]) 

2128 A[index] = np.dot(CA, CA.T) 

2129 

2130 return A 

2131 

2132 def rvs(self, df, scale, size=1, random_state=None): 

2133 """ 

2134 Draw random samples from a Wishart distribution. 

2135 

2136 Parameters 

2137 ---------- 

2138 %(_doc_default_callparams)s 

2139 size : integer or iterable of integers, optional 

2140 Number of samples to draw (default 1). 

2141 %(_doc_random_state)s 

2142 

2143 Returns 

2144 ------- 

2145 rvs : ndarray 

2146 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 

2147 the dimension of the scale matrix. 

2148 

2149 Notes 

2150 ----- 

2151 %(_doc_callparams_note)s 

2152 

2153 """ 

2154 n, shape = self._process_size(size) 

2155 dim, df, scale = self._process_parameters(df, scale) 

2156 

2157 # Cholesky decomposition of scale 

2158 C = scipy.linalg.cholesky(scale, lower=True) 

2159 

2160 out = self._rvs(n, shape, dim, df, C, random_state) 

2161 

2162 return _squeeze_output(out) 

2163 

2164 def _entropy(self, dim, df, log_det_scale): 

2165 """ 

2166 Parameters 

2167 ---------- 

2168 dim : int 

2169 Dimension of the scale matrix 

2170 df : int 

2171 Degrees of freedom 

2172 log_det_scale : float 

2173 Logarithm of the determinant of the scale matrix 

2174 

2175 Notes 

2176 ----- 

2177 As this function does no argument checking, it should not be 

2178 called directly; use 'entropy' instead. 

2179 

2180 """ 

2181 return ( 

2182 0.5 * (dim+1) * log_det_scale + 

2183 0.5 * dim * (dim+1) * _LOG_2 + 

2184 multigammaln(0.5*df, dim) - 

2185 0.5 * (df - dim - 1) * np.sum( 

2186 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)] 

2187 ) + 

2188 0.5 * df * dim 

2189 ) 

2190 

2191 def entropy(self, df, scale): 

2192 """ 

2193 Compute the differential entropy of the Wishart. 

2194 

2195 Parameters 

2196 ---------- 

2197 %(_doc_default_callparams)s 

2198 

2199 Returns 

2200 ------- 

2201 h : scalar 

2202 Entropy of the Wishart distribution 

2203 

2204 Notes 

2205 ----- 

2206 %(_doc_callparams_note)s 

2207 

2208 """ 

2209 dim, df, scale = self._process_parameters(df, scale) 

2210 _, log_det_scale = self._cholesky_logdet(scale) 

2211 return self._entropy(dim, df, log_det_scale) 

2212 

2213 def _cholesky_logdet(self, scale): 

2214 """ 

2215 Compute Cholesky decomposition and determine (log(det(scale)). 

2216 

2217 Parameters 

2218 ---------- 

2219 scale : ndarray 

2220 Scale matrix. 

2221 

2222 Returns 

2223 ------- 

2224 c_decomp : ndarray 

2225 The Cholesky decomposition of `scale`. 

2226 logdet : scalar 

2227 The log of the determinant of `scale`. 

2228 

2229 Notes 

2230 ----- 

2231 This computation of ``logdet`` is equivalent to 

2232 ``np.linalg.slogdet(scale)``. It is ~2x faster though. 

2233 

2234 """ 

2235 c_decomp = scipy.linalg.cholesky(scale, lower=True) 

2236 logdet = 2 * np.sum(np.log(c_decomp.diagonal())) 

2237 return c_decomp, logdet 

2238 

2239 

2240wishart = wishart_gen() 

2241 

2242 

2243class wishart_frozen(multi_rv_frozen): 

2244 """ 

2245 Create a frozen Wishart distribution. 

2246 

2247 Parameters 

2248 ---------- 

2249 df : array_like 

2250 Degrees of freedom of the distribution 

2251 scale : array_like 

2252 Scale matrix of the distribution 

2253 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

2254 This parameter defines the object to use for drawing random variates. 

2255 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

2256 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

2257 with seed. 

2258 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

2259 then that object is used. 

2260 Default is None. 

2261 

2262 """ 

2263 def __init__(self, df, scale, seed=None): 

2264 self._dist = wishart_gen(seed) 

2265 self.dim, self.df, self.scale = self._dist._process_parameters( 

2266 df, scale) 

2267 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale) 

2268 

2269 def logpdf(self, x): 

2270 x = self._dist._process_quantiles(x, self.dim) 

2271 

2272 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 

2273 self.log_det_scale, self.C) 

2274 return _squeeze_output(out) 

2275 

2276 def pdf(self, x): 

2277 return np.exp(self.logpdf(x)) 

2278 

2279 def mean(self): 

2280 out = self._dist._mean(self.dim, self.df, self.scale) 

2281 return _squeeze_output(out) 

2282 

2283 def mode(self): 

2284 out = self._dist._mode(self.dim, self.df, self.scale) 

2285 return _squeeze_output(out) if out is not None else out 

2286 

2287 def var(self): 

2288 out = self._dist._var(self.dim, self.df, self.scale) 

2289 return _squeeze_output(out) 

2290 

2291 def rvs(self, size=1, random_state=None): 

2292 n, shape = self._dist._process_size(size) 

2293 out = self._dist._rvs(n, shape, self.dim, self.df, 

2294 self.C, random_state) 

2295 return _squeeze_output(out) 

2296 

2297 def entropy(self): 

2298 return self._dist._entropy(self.dim, self.df, self.log_det_scale) 

2299 

2300 

2301# Set frozen generator docstrings from corresponding docstrings in 

2302# Wishart and fill in default strings in class docstrings 

2303for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']: 

2304 method = wishart_gen.__dict__[name] 

2305 method_frozen = wishart_frozen.__dict__[name] 

2306 method_frozen.__doc__ = doccer.docformat( 

2307 method.__doc__, wishart_docdict_noparams) 

2308 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 

2309 

2310 

2311def _cho_inv_batch(a, check_finite=True): 

2312 """ 

2313 Invert the matrices a_i, using a Cholesky factorization of A, where 

2314 a_i resides in the last two dimensions of a and the other indices describe 

2315 the index i. 

2316 

2317 Overwrites the data in a. 

2318 

2319 Parameters 

2320 ---------- 

2321 a : array 

2322 Array of matrices to invert, where the matrices themselves are stored 

2323 in the last two dimensions. 

2324 check_finite : bool, optional 

2325 Whether to check that the input matrices contain only finite numbers. 

2326 Disabling may give a performance gain, but may result in problems 

2327 (crashes, non-termination) if the inputs do contain infinities or NaNs. 

2328 

2329 Returns 

2330 ------- 

2331 x : array 

2332 Array of inverses of the matrices ``a_i``. 

2333 

2334 See also 

2335 -------- 

2336 scipy.linalg.cholesky : Cholesky factorization of a matrix 

2337 

2338 """ 

2339 if check_finite: 

2340 a1 = asarray_chkfinite(a) 

2341 else: 

2342 a1 = asarray(a) 

2343 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]: 

2344 raise ValueError('expected square matrix in last two dimensions') 

2345 

2346 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,)) 

2347 

2348 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1) 

2349 for index in np.ndindex(a1.shape[:-2]): 

2350 

2351 # Cholesky decomposition 

2352 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False, 

2353 clean=False) 

2354 if info > 0: 

2355 raise LinAlgError("%d-th leading minor not positive definite" 

2356 % info) 

2357 if info < 0: 

2358 raise ValueError('illegal value in %d-th argument of internal' 

2359 ' potrf' % -info) 

2360 # Inversion 

2361 a1[index], info = potri(a1[index], lower=True, overwrite_c=False) 

2362 if info > 0: 

2363 raise LinAlgError("the inverse could not be computed") 

2364 if info < 0: 

2365 raise ValueError('illegal value in %d-th argument of internal' 

2366 ' potrf' % -info) 

2367 

2368 # Make symmetric (dpotri only fills in the lower triangle) 

2369 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows] 

2370 

2371 return a1 

2372 

2373 

2374class invwishart_gen(wishart_gen): 

2375 r""" 

2376 An inverse Wishart random variable. 

2377 

2378 The `df` keyword specifies the degrees of freedom. The `scale` keyword 

2379 specifies the scale matrix, which must be symmetric and positive definite. 

2380 In this context, the scale matrix is often interpreted in terms of a 

2381 multivariate normal covariance matrix. 

2382 

2383 Methods 

2384 ------- 

2385 ``pdf(x, df, scale)`` 

2386 Probability density function. 

2387 ``logpdf(x, df, scale)`` 

2388 Log of the probability density function. 

2389 ``rvs(df, scale, size=1, random_state=None)`` 

2390 Draw random samples from an inverse Wishart distribution. 

2391 

2392 Parameters 

2393 ---------- 

2394 x : array_like 

2395 Quantiles, with the last axis of `x` denoting the components. 

2396 %(_doc_default_callparams)s 

2397 %(_doc_random_state)s 

2398 

2399 Alternatively, the object may be called (as a function) to fix the degrees 

2400 of freedom and scale parameters, returning a "frozen" inverse Wishart 

2401 random variable: 

2402 

2403 rv = invwishart(df=1, scale=1) 

2404 - Frozen object with the same methods but holding the given 

2405 degrees of freedom and scale fixed. 

2406 

2407 See Also 

2408 -------- 

2409 wishart 

2410 

2411 Notes 

2412 ----- 

2413 %(_doc_callparams_note)s 

2414 

2415 The scale matrix `scale` must be a symmetric positive definite 

2416 matrix. Singular matrices, including the symmetric positive semi-definite 

2417 case, are not supported. 

2418 

2419 The inverse Wishart distribution is often denoted 

2420 

2421 .. math:: 

2422 

2423 W_p^{-1}(\nu, \Psi) 

2424 

2425 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the 

2426 :math:`p \times p` scale matrix. 

2427 

2428 The probability density function for `invwishart` has support over positive 

2429 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`, 

2430 then its PDF is given by: 

2431 

2432 .. math:: 

2433 

2434 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} } 

2435 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)} 

2436 \exp\left( -tr(\Sigma S^{-1}) / 2 \right) 

2437 

2438 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then 

2439 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart). 

2440 

2441 If the scale matrix is 1-dimensional and equal to one, then the inverse 

2442 Wishart distribution :math:`W_1(\nu, 1)` collapses to the 

2443 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}` 

2444 and scale = :math:`\frac{1}{2}`. 

2445 

2446 .. versionadded:: 0.16.0 

2447 

2448 References 

2449 ---------- 

2450 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach", 

2451 Wiley, 1983. 

2452 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications 

2453 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514, 

2454 1985. 

2455 

2456 Examples 

2457 -------- 

2458 >>> import matplotlib.pyplot as plt 

2459 >>> from scipy.stats import invwishart, invgamma 

2460 >>> x = np.linspace(0.01, 1, 100) 

2461 >>> iw = invwishart.pdf(x, df=6, scale=1) 

2462 >>> iw[:3] 

2463 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 

2464 >>> ig = invgamma.pdf(x, 6/2., scale=1./2) 

2465 >>> ig[:3] 

2466 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03]) 

2467 >>> plt.plot(x, iw) 

2468 

2469 The input quantiles can be any shape of array, as long as the last 

2470 axis labels the components. 

2471 

2472 """ 

2473 

2474 def __init__(self, seed=None): 

2475 super(invwishart_gen, self).__init__(seed) 

2476 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params) 

2477 

2478 def __call__(self, df=None, scale=None, seed=None): 

2479 """ 

2480 Create a frozen inverse Wishart distribution. 

2481 

2482 See `invwishart_frozen` for more information. 

2483 

2484 """ 

2485 return invwishart_frozen(df, scale, seed) 

2486 

2487 def _logpdf(self, x, dim, df, scale, log_det_scale): 

2488 """ 

2489 Parameters 

2490 ---------- 

2491 x : ndarray 

2492 Points at which to evaluate the log of the probability 

2493 density function. 

2494 dim : int 

2495 Dimension of the scale matrix 

2496 df : int 

2497 Degrees of freedom 

2498 scale : ndarray 

2499 Scale matrix 

2500 log_det_scale : float 

2501 Logarithm of the determinant of the scale matrix 

2502 

2503 Notes 

2504 ----- 

2505 As this function does no argument checking, it should not be 

2506 called directly; use 'logpdf' instead. 

2507 

2508 """ 

2509 log_det_x = np.zeros(x.shape[-1]) 

2510 x_inv = np.copy(x).T 

2511 if dim > 1: 

2512 _cho_inv_batch(x_inv) # works in-place 

2513 else: 

2514 x_inv = 1./x_inv 

2515 tr_scale_x_inv = np.zeros(x.shape[-1]) 

2516 

2517 for i in range(x.shape[-1]): 

2518 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True) 

2519 

2520 log_det_x[i] = 2 * np.sum(np.log(C.diagonal())) 

2521 

2522 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace() 

2523 

2524 # Log PDF 

2525 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) - 

2526 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) - 

2527 multigammaln(0.5*df, dim)) 

2528 

2529 return out 

2530 

2531 def logpdf(self, x, df, scale): 

2532 """ 

2533 Log of the inverse Wishart probability density function. 

2534 

2535 Parameters 

2536 ---------- 

2537 x : array_like 

2538 Quantiles, with the last axis of `x` denoting the components. 

2539 Each quantile must be a symmetric positive definite matrix. 

2540 %(_doc_default_callparams)s 

2541 

2542 Returns 

2543 ------- 

2544 pdf : ndarray 

2545 Log of the probability density function evaluated at `x` 

2546 

2547 Notes 

2548 ----- 

2549 %(_doc_callparams_note)s 

2550 

2551 """ 

2552 dim, df, scale = self._process_parameters(df, scale) 

2553 x = self._process_quantiles(x, dim) 

2554 _, log_det_scale = self._cholesky_logdet(scale) 

2555 out = self._logpdf(x, dim, df, scale, log_det_scale) 

2556 return _squeeze_output(out) 

2557 

2558 def pdf(self, x, df, scale): 

2559 """ 

2560 Inverse Wishart probability density function. 

2561 

2562 Parameters 

2563 ---------- 

2564 x : array_like 

2565 Quantiles, with the last axis of `x` denoting the components. 

2566 Each quantile must be a symmetric positive definite matrix. 

2567 

2568 %(_doc_default_callparams)s 

2569 

2570 Returns 

2571 ------- 

2572 pdf : ndarray 

2573 Probability density function evaluated at `x` 

2574 

2575 Notes 

2576 ----- 

2577 %(_doc_callparams_note)s 

2578 

2579 """ 

2580 return np.exp(self.logpdf(x, df, scale)) 

2581 

2582 def _mean(self, dim, df, scale): 

2583 """ 

2584 Parameters 

2585 ---------- 

2586 dim : int 

2587 Dimension of the scale matrix 

2588 %(_doc_default_callparams)s 

2589 

2590 Notes 

2591 ----- 

2592 As this function does no argument checking, it should not be 

2593 called directly; use 'mean' instead. 

2594 

2595 """ 

2596 if df > dim + 1: 

2597 out = scale / (df - dim - 1) 

2598 else: 

2599 out = None 

2600 return out 

2601 

2602 def mean(self, df, scale): 

2603 """ 

2604 Mean of the inverse Wishart distribution 

2605 

2606 Only valid if the degrees of freedom are greater than the dimension of 

2607 the scale matrix plus one. 

2608 

2609 Parameters 

2610 ---------- 

2611 %(_doc_default_callparams)s 

2612 

2613 Returns 

2614 ------- 

2615 mean : float or None 

2616 The mean of the distribution 

2617 

2618 """ 

2619 dim, df, scale = self._process_parameters(df, scale) 

2620 out = self._mean(dim, df, scale) 

2621 return _squeeze_output(out) if out is not None else out 

2622 

2623 def _mode(self, dim, df, scale): 

2624 """ 

2625 Parameters 

2626 ---------- 

2627 dim : int 

2628 Dimension of the scale matrix 

2629 %(_doc_default_callparams)s 

2630 

2631 Notes 

2632 ----- 

2633 As this function does no argument checking, it should not be 

2634 called directly; use 'mode' instead. 

2635 

2636 """ 

2637 return scale / (df + dim + 1) 

2638 

2639 def mode(self, df, scale): 

2640 """ 

2641 Mode of the inverse Wishart distribution 

2642 

2643 Parameters 

2644 ---------- 

2645 %(_doc_default_callparams)s 

2646 

2647 Returns 

2648 ------- 

2649 mode : float 

2650 The Mode of the distribution 

2651 

2652 """ 

2653 dim, df, scale = self._process_parameters(df, scale) 

2654 out = self._mode(dim, df, scale) 

2655 return _squeeze_output(out) 

2656 

2657 def _var(self, dim, df, scale): 

2658 """ 

2659 Parameters 

2660 ---------- 

2661 dim : int 

2662 Dimension of the scale matrix 

2663 %(_doc_default_callparams)s 

2664 

2665 Notes 

2666 ----- 

2667 As this function does no argument checking, it should not be 

2668 called directly; use 'var' instead. 

2669 

2670 """ 

2671 if df > dim + 3: 

2672 var = (df - dim + 1) * scale**2 

2673 diag = scale.diagonal() # 1 x dim array 

2674 var += (df - dim - 1) * np.outer(diag, diag) 

2675 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3) 

2676 else: 

2677 var = None 

2678 return var 

2679 

2680 def var(self, df, scale): 

2681 """ 

2682 Variance of the inverse Wishart distribution 

2683 

2684 Only valid if the degrees of freedom are greater than the dimension of 

2685 the scale matrix plus three. 

2686 

2687 Parameters 

2688 ---------- 

2689 %(_doc_default_callparams)s 

2690 

2691 Returns 

2692 ------- 

2693 var : float 

2694 The variance of the distribution 

2695 """ 

2696 dim, df, scale = self._process_parameters(df, scale) 

2697 out = self._var(dim, df, scale) 

2698 return _squeeze_output(out) if out is not None else out 

2699 

2700 def _rvs(self, n, shape, dim, df, C, random_state): 

2701 """ 

2702 Parameters 

2703 ---------- 

2704 n : integer 

2705 Number of variates to generate 

2706 shape : iterable 

2707 Shape of the variates to generate 

2708 dim : int 

2709 Dimension of the scale matrix 

2710 df : int 

2711 Degrees of freedom 

2712 C : ndarray 

2713 Cholesky factorization of the scale matrix, lower triagular. 

2714 %(_doc_random_state)s 

2715 

2716 Notes 

2717 ----- 

2718 As this function does no argument checking, it should not be 

2719 called directly; use 'rvs' instead. 

2720 

2721 """ 

2722 random_state = self._get_random_state(random_state) 

2723 # Get random draws A such that A ~ W(df, I) 

2724 A = super(invwishart_gen, self)._standard_rvs(n, shape, dim, 

2725 df, random_state) 

2726 

2727 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale) 

2728 eye = np.eye(dim) 

2729 trtrs = get_lapack_funcs(('trtrs'), (A,)) 

2730 

2731 for index in np.ndindex(A.shape[:-2]): 

2732 # Calculate CA 

2733 CA = np.dot(C, A[index]) 

2734 # Get (C A)^{-1} via triangular solver 

2735 if dim > 1: 

2736 CA, info = trtrs(CA, eye, lower=True) 

2737 if info > 0: 

2738 raise LinAlgError("Singular matrix.") 

2739 if info < 0: 

2740 raise ValueError('Illegal value in %d-th argument of' 

2741 ' internal trtrs' % -info) 

2742 else: 

2743 CA = 1. / CA 

2744 # Get SA 

2745 A[index] = np.dot(CA.T, CA) 

2746 

2747 return A 

2748 

2749 def rvs(self, df, scale, size=1, random_state=None): 

2750 """ 

2751 Draw random samples from an inverse Wishart distribution. 

2752 

2753 Parameters 

2754 ---------- 

2755 %(_doc_default_callparams)s 

2756 size : integer or iterable of integers, optional 

2757 Number of samples to draw (default 1). 

2758 %(_doc_random_state)s 

2759 

2760 Returns 

2761 ------- 

2762 rvs : ndarray 

2763 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is 

2764 the dimension of the scale matrix. 

2765 

2766 Notes 

2767 ----- 

2768 %(_doc_callparams_note)s 

2769 

2770 """ 

2771 n, shape = self._process_size(size) 

2772 dim, df, scale = self._process_parameters(df, scale) 

2773 

2774 # Invert the scale 

2775 eye = np.eye(dim) 

2776 L, lower = scipy.linalg.cho_factor(scale, lower=True) 

2777 inv_scale = scipy.linalg.cho_solve((L, lower), eye) 

2778 # Cholesky decomposition of inverted scale 

2779 C = scipy.linalg.cholesky(inv_scale, lower=True) 

2780 

2781 out = self._rvs(n, shape, dim, df, C, random_state) 

2782 

2783 return _squeeze_output(out) 

2784 

2785 def entropy(self): 

2786 # Need to find reference for inverse Wishart entropy 

2787 raise AttributeError 

2788 

2789 

2790invwishart = invwishart_gen() 

2791 

2792 

2793class invwishart_frozen(multi_rv_frozen): 

2794 def __init__(self, df, scale, seed=None): 

2795 """ 

2796 Create a frozen inverse Wishart distribution. 

2797 

2798 Parameters 

2799 ---------- 

2800 df : array_like 

2801 Degrees of freedom of the distribution 

2802 scale : array_like 

2803 Scale matrix of the distribution 

2804 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

2805 This parameter defines the object to use for drawing random 

2806 variates. 

2807 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

2808 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

2809 with seed. 

2810 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

2811 then that object is used. 

2812 Default is None. 

2813 

2814 """ 

2815 self._dist = invwishart_gen(seed) 

2816 self.dim, self.df, self.scale = self._dist._process_parameters( 

2817 df, scale 

2818 ) 

2819 

2820 # Get the determinant via Cholesky factorization 

2821 C, lower = scipy.linalg.cho_factor(self.scale, lower=True) 

2822 self.log_det_scale = 2 * np.sum(np.log(C.diagonal())) 

2823 

2824 # Get the inverse using the Cholesky factorization 

2825 eye = np.eye(self.dim) 

2826 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye) 

2827 

2828 # Get the Cholesky factorization of the inverse scale 

2829 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True) 

2830 

2831 def logpdf(self, x): 

2832 x = self._dist._process_quantiles(x, self.dim) 

2833 out = self._dist._logpdf(x, self.dim, self.df, self.scale, 

2834 self.log_det_scale) 

2835 return _squeeze_output(out) 

2836 

2837 def pdf(self, x): 

2838 return np.exp(self.logpdf(x)) 

2839 

2840 def mean(self): 

2841 out = self._dist._mean(self.dim, self.df, self.scale) 

2842 return _squeeze_output(out) if out is not None else out 

2843 

2844 def mode(self): 

2845 out = self._dist._mode(self.dim, self.df, self.scale) 

2846 return _squeeze_output(out) 

2847 

2848 def var(self): 

2849 out = self._dist._var(self.dim, self.df, self.scale) 

2850 return _squeeze_output(out) if out is not None else out 

2851 

2852 def rvs(self, size=1, random_state=None): 

2853 n, shape = self._dist._process_size(size) 

2854 

2855 out = self._dist._rvs(n, shape, self.dim, self.df, 

2856 self.C, random_state) 

2857 

2858 return _squeeze_output(out) 

2859 

2860 def entropy(self): 

2861 # Need to find reference for inverse Wishart entropy 

2862 raise AttributeError 

2863 

2864 

2865# Set frozen generator docstrings from corresponding docstrings in 

2866# inverse Wishart and fill in default strings in class docstrings 

2867for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']: 

2868 method = invwishart_gen.__dict__[name] 

2869 method_frozen = wishart_frozen.__dict__[name] 

2870 method_frozen.__doc__ = doccer.docformat( 

2871 method.__doc__, wishart_docdict_noparams) 

2872 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params) 

2873 

2874_multinomial_doc_default_callparams = """\ 

2875n : int 

2876 Number of trials 

2877p : array_like 

2878 Probability of a trial falling into each category; should sum to 1 

2879""" 

2880 

2881_multinomial_doc_callparams_note = \ 

2882"""`n` should be a positive integer. Each element of `p` should be in the 

2883interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to 

28841, the last element of the `p` array is not used and is replaced with the 

2885remaining probability left over from the earlier elements. 

2886""" 

2887 

2888_multinomial_doc_frozen_callparams = "" 

2889 

2890_multinomial_doc_frozen_callparams_note = \ 

2891 """See class definition for a detailed description of parameters.""" 

2892 

2893multinomial_docdict_params = { 

2894 '_doc_default_callparams': _multinomial_doc_default_callparams, 

2895 '_doc_callparams_note': _multinomial_doc_callparams_note, 

2896 '_doc_random_state': _doc_random_state 

2897} 

2898 

2899multinomial_docdict_noparams = { 

2900 '_doc_default_callparams': _multinomial_doc_frozen_callparams, 

2901 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note, 

2902 '_doc_random_state': _doc_random_state 

2903} 

2904 

2905 

2906class multinomial_gen(multi_rv_generic): 

2907 r""" 

2908 A multinomial random variable. 

2909 

2910 Methods 

2911 ------- 

2912 ``pmf(x, n, p)`` 

2913 Probability mass function. 

2914 ``logpmf(x, n, p)`` 

2915 Log of the probability mass function. 

2916 ``rvs(n, p, size=1, random_state=None)`` 

2917 Draw random samples from a multinomial distribution. 

2918 ``entropy(n, p)`` 

2919 Compute the entropy of the multinomial distribution. 

2920 ``cov(n, p)`` 

2921 Compute the covariance matrix of the multinomial distribution. 

2922 

2923 Parameters 

2924 ---------- 

2925 x : array_like 

2926 Quantiles, with the last axis of `x` denoting the components. 

2927 %(_doc_default_callparams)s 

2928 %(_doc_random_state)s 

2929 

2930 Notes 

2931 ----- 

2932 %(_doc_callparams_note)s 

2933 

2934 Alternatively, the object may be called (as a function) to fix the `n` and 

2935 `p` parameters, returning a "frozen" multinomial random variable: 

2936 

2937 The probability mass function for `multinomial` is 

2938 

2939 .. math:: 

2940 

2941 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k}, 

2942 

2943 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a 

2944 nonnegative integer and their sum is :math:`n`. 

2945 

2946 .. versionadded:: 0.19.0 

2947 

2948 Examples 

2949 -------- 

2950 

2951 >>> from scipy.stats import multinomial 

2952 >>> rv = multinomial(8, [0.3, 0.2, 0.5]) 

2953 >>> rv.pmf([1, 3, 4]) 

2954 0.042000000000000072 

2955 

2956 The multinomial distribution for :math:`k=2` is identical to the 

2957 corresponding binomial distribution (tiny numerical differences 

2958 notwithstanding): 

2959 

2960 >>> from scipy.stats import binom 

2961 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6]) 

2962 0.29030399999999973 

2963 >>> binom.pmf(3, 7, 0.4) 

2964 0.29030400000000012 

2965 

2966 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support 

2967 broadcasting, under the convention that the vector parameters (``x`` and 

2968 ``p``) are interpreted as if each row along the last axis is a single 

2969 object. For instance: 

2970 

2971 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7]) 

2972 array([0.2268945, 0.25412184]) 

2973 

2974 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``, 

2975 but following the rules mentioned above they behave as if the rows 

2976 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single 

2977 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and 

2978 ``p.shape = ()``. To obtain the individual elements without broadcasting, 

2979 we would do this: 

2980 

2981 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7]) 

2982 0.2268945 

2983 >>> multinomial.pmf([3, 5], 8, p=[.3, .7]) 

2984 0.25412184 

2985 

2986 This broadcasting also works for ``cov``, where the output objects are 

2987 square matrices of size ``p.shape[-1]``. For example: 

2988 

2989 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]]) 

2990 array([[[ 0.84, -0.84], 

2991 [-0.84, 0.84]], 

2992 [[ 1.2 , -1.2 ], 

2993 [-1.2 , 1.2 ]]]) 

2994 

2995 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and 

2996 following the rules above, these broadcast as if ``p.shape == (2,)``. 

2997 Thus the result should also be of shape ``(2,)``, but since each output is 

2998 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``, 

2999 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and 

3000 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``. 

3001 

3002 See also 

3003 -------- 

3004 scipy.stats.binom : The binomial distribution. 

3005 numpy.random.Generator.multinomial : Sampling from the multinomial distribution. 

3006 """ # noqa: E501 

3007 

3008 def __init__(self, seed=None): 

3009 super(multinomial_gen, self).__init__(seed) 

3010 self.__doc__ = \ 

3011 doccer.docformat(self.__doc__, multinomial_docdict_params) 

3012 

3013 def __call__(self, n, p, seed=None): 

3014 """ 

3015 Create a frozen multinomial distribution. 

3016 

3017 See `multinomial_frozen` for more information. 

3018 """ 

3019 return multinomial_frozen(n, p, seed) 

3020 

3021 def _process_parameters(self, n, p): 

3022 """ 

3023 Return: n_, p_, npcond. 

3024 

3025 n_ and p_ are arrays of the correct shape; npcond is a boolean array 

3026 flagging values out of the domain. 

3027 """ 

3028 p = np.array(p, dtype=np.float64, copy=True) 

3029 p[..., -1] = 1. - p[..., :-1].sum(axis=-1) 

3030 

3031 # true for bad p 

3032 pcond = np.any(p < 0, axis=-1) 

3033 pcond |= np.any(p > 1, axis=-1) 

3034 

3035 n = np.array(n, dtype=np.int, copy=True) 

3036 

3037 # true for bad n 

3038 ncond = n <= 0 

3039 

3040 return n, p, ncond | pcond 

3041 

3042 def _process_quantiles(self, x, n, p): 

3043 """ 

3044 Return: x_, xcond. 

3045 

3046 x_ is an int array; xcond is a boolean array flagging values out of the 

3047 domain. 

3048 """ 

3049 xx = np.asarray(x, dtype=np.int) 

3050 

3051 if xx.ndim == 0: 

3052 raise ValueError("x must be an array.") 

3053 

3054 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]: 

3055 raise ValueError("Size of each quantile should be size of p: " 

3056 "received %d, but expected %d." % 

3057 (xx.shape[-1], p.shape[-1])) 

3058 

3059 # true for x out of the domain 

3060 cond = np.any(xx != x, axis=-1) 

3061 cond |= np.any(xx < 0, axis=-1) 

3062 cond = cond | (np.sum(xx, axis=-1) != n) 

3063 

3064 return xx, cond 

3065 

3066 def _checkresult(self, result, cond, bad_value): 

3067 result = np.asarray(result) 

3068 

3069 if cond.ndim != 0: 

3070 result[cond] = bad_value 

3071 elif cond: 

3072 if result.ndim == 0: 

3073 return bad_value 

3074 result[...] = bad_value 

3075 return result 

3076 

3077 def _logpmf(self, x, n, p): 

3078 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1) 

3079 

3080 def logpmf(self, x, n, p): 

3081 """ 

3082 Log of the Multinomial probability mass function. 

3083 

3084 Parameters 

3085 ---------- 

3086 x : array_like 

3087 Quantiles, with the last axis of `x` denoting the components. 

3088 %(_doc_default_callparams)s 

3089 

3090 Returns 

3091 ------- 

3092 logpmf : ndarray or scalar 

3093 Log of the probability mass function evaluated at `x` 

3094 

3095 Notes 

3096 ----- 

3097 %(_doc_callparams_note)s 

3098 """ 

3099 n, p, npcond = self._process_parameters(n, p) 

3100 x, xcond = self._process_quantiles(x, n, p) 

3101 

3102 result = self._logpmf(x, n, p) 

3103 

3104 # replace values for which x was out of the domain; broadcast 

3105 # xcond to the right shape 

3106 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_) 

3107 result = self._checkresult(result, xcond_, np.NINF) 

3108 

3109 # replace values bad for n or p; broadcast npcond to the right shape 

3110 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_) 

3111 return self._checkresult(result, npcond_, np.NAN) 

3112 

3113 def pmf(self, x, n, p): 

3114 """ 

3115 Multinomial probability mass function. 

3116 

3117 Parameters 

3118 ---------- 

3119 x : array_like 

3120 Quantiles, with the last axis of `x` denoting the components. 

3121 %(_doc_default_callparams)s 

3122 

3123 Returns 

3124 ------- 

3125 pmf : ndarray or scalar 

3126 Probability density function evaluated at `x` 

3127 

3128 Notes 

3129 ----- 

3130 %(_doc_callparams_note)s 

3131 """ 

3132 return np.exp(self.logpmf(x, n, p)) 

3133 

3134 def mean(self, n, p): 

3135 """ 

3136 Mean of the Multinomial distribution 

3137 

3138 Parameters 

3139 ---------- 

3140 %(_doc_default_callparams)s 

3141 

3142 Returns 

3143 ------- 

3144 mean : float 

3145 The mean of the distribution 

3146 """ 

3147 n, p, npcond = self._process_parameters(n, p) 

3148 result = n[..., np.newaxis]*p 

3149 return self._checkresult(result, npcond, np.NAN) 

3150 

3151 def cov(self, n, p): 

3152 """ 

3153 Covariance matrix of the multinomial distribution. 

3154 

3155 Parameters 

3156 ---------- 

3157 %(_doc_default_callparams)s 

3158 

3159 Returns 

3160 ------- 

3161 cov : ndarray 

3162 The covariance matrix of the distribution 

3163 """ 

3164 n, p, npcond = self._process_parameters(n, p) 

3165 

3166 nn = n[..., np.newaxis, np.newaxis] 

3167 result = nn * np.einsum('...j,...k->...jk', -p, p) 

3168 

3169 # change the diagonal 

3170 for i in range(p.shape[-1]): 

3171 result[..., i, i] += n*p[..., i] 

3172 

3173 return self._checkresult(result, npcond, np.nan) 

3174 

3175 def entropy(self, n, p): 

3176 r""" 

3177 Compute the entropy of the multinomial distribution. 

3178 

3179 The entropy is computed using this expression: 

3180 

3181 .. math:: 

3182 

3183 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i + 

3184 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x! 

3185 

3186 Parameters 

3187 ---------- 

3188 %(_doc_default_callparams)s 

3189 

3190 Returns 

3191 ------- 

3192 h : scalar 

3193 Entropy of the Multinomial distribution 

3194 

3195 Notes 

3196 ----- 

3197 %(_doc_callparams_note)s 

3198 """ 

3199 n, p, npcond = self._process_parameters(n, p) 

3200 

3201 x = np.r_[1:np.max(n)+1] 

3202 

3203 term1 = n*np.sum(entr(p), axis=-1) 

3204 term1 -= gammaln(n+1) 

3205 

3206 n = n[..., np.newaxis] 

3207 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1 

3208 x.shape += (1,)*new_axes_needed 

3209 

3210 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1), 

3211 axis=(-1, -1-new_axes_needed)) 

3212 

3213 return self._checkresult(term1 + term2, npcond, np.nan) 

3214 

3215 def rvs(self, n, p, size=None, random_state=None): 

3216 """ 

3217 Draw random samples from a Multinomial distribution. 

3218 

3219 Parameters 

3220 ---------- 

3221 %(_doc_default_callparams)s 

3222 size : integer or iterable of integers, optional 

3223 Number of samples to draw (default 1). 

3224 %(_doc_random_state)s 

3225 

3226 Returns 

3227 ------- 

3228 rvs : ndarray or scalar 

3229 Random variates of shape (`size`, `len(p)`) 

3230 

3231 Notes 

3232 ----- 

3233 %(_doc_callparams_note)s 

3234 """ 

3235 n, p, npcond = self._process_parameters(n, p) 

3236 random_state = self._get_random_state(random_state) 

3237 return random_state.multinomial(n, p, size) 

3238 

3239 

3240multinomial = multinomial_gen() 

3241 

3242 

3243class multinomial_frozen(multi_rv_frozen): 

3244 r""" 

3245 Create a frozen Multinomial distribution. 

3246 

3247 Parameters 

3248 ---------- 

3249 n : int 

3250 number of trials 

3251 p: array_like 

3252 probability of a trial falling into each category; should sum to 1 

3253 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

3254 This parameter defines the object to use for drawing random variates. 

3255 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

3256 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

3257 with seed. 

3258 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

3259 then that object is used. 

3260 Default is None. 

3261 """ 

3262 def __init__(self, n, p, seed=None): 

3263 self._dist = multinomial_gen(seed) 

3264 self.n, self.p, self.npcond = self._dist._process_parameters(n, p) 

3265 

3266 # monkey patch self._dist 

3267 def _process_parameters(n, p): 

3268 return self.n, self.p, self.npcond 

3269 

3270 self._dist._process_parameters = _process_parameters 

3271 

3272 def logpmf(self, x): 

3273 return self._dist.logpmf(x, self.n, self.p) 

3274 

3275 def pmf(self, x): 

3276 return self._dist.pmf(x, self.n, self.p) 

3277 

3278 def mean(self): 

3279 return self._dist.mean(self.n, self.p) 

3280 

3281 def cov(self): 

3282 return self._dist.cov(self.n, self.p) 

3283 

3284 def entropy(self): 

3285 return self._dist.entropy(self.n, self.p) 

3286 

3287 def rvs(self, size=1, random_state=None): 

3288 return self._dist.rvs(self.n, self.p, size, random_state) 

3289 

3290 

3291# Set frozen generator docstrings from corresponding docstrings in 

3292# multinomial and fill in default strings in class docstrings 

3293for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']: 

3294 method = multinomial_gen.__dict__[name] 

3295 method_frozen = multinomial_frozen.__dict__[name] 

3296 method_frozen.__doc__ = doccer.docformat( 

3297 method.__doc__, multinomial_docdict_noparams) 

3298 method.__doc__ = doccer.docformat(method.__doc__, 

3299 multinomial_docdict_params) 

3300 

3301 

3302class special_ortho_group_gen(multi_rv_generic): 

3303 r""" 

3304 A matrix-valued SO(N) random variable. 

3305 

3306 Return a random rotation matrix, drawn from the Haar distribution 

3307 (the only uniform distribution on SO(n)). 

3308 

3309 The `dim` keyword specifies the dimension N. 

3310 

3311 Methods 

3312 ------- 

3313 ``rvs(dim=None, size=1, random_state=None)`` 

3314 Draw random samples from SO(N). 

3315 

3316 Parameters 

3317 ---------- 

3318 dim : scalar 

3319 Dimension of matrices 

3320 

3321 Notes 

3322 ---------- 

3323 This class is wrapping the random_rot code from the MDP Toolkit, 

3324 https://github.com/mdp-toolkit/mdp-toolkit 

3325 

3326 Return a random rotation matrix, drawn from the Haar distribution 

3327 (the only uniform distribution on SO(n)). 

3328 The algorithm is described in the paper 

3329 Stewart, G.W., "The efficient generation of random orthogonal 

3330 matrices with an application to condition estimators", SIAM Journal 

3331 on Numerical Analysis, 17(3), pp. 403-409, 1980. 

3332 For more information see 

3333 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization 

3334 

3335 See also the similar `ortho_group`. 

3336 

3337 Examples 

3338 -------- 

3339 >>> from scipy.stats import special_ortho_group 

3340 >>> x = special_ortho_group.rvs(3) 

3341 

3342 >>> np.dot(x, x.T) 

3343 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

3344 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

3345 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

3346 

3347 >>> import scipy.linalg 

3348 >>> scipy.linalg.det(x) 

3349 1.0 

3350 

3351 This generates one random matrix from SO(3). It is orthogonal and 

3352 has a determinant of 1. 

3353 

3354 """ 

3355 

3356 def __init__(self, seed=None): 

3357 super(special_ortho_group_gen, self).__init__(seed) 

3358 self.__doc__ = doccer.docformat(self.__doc__) 

3359 

3360 def __call__(self, dim=None, seed=None): 

3361 """ 

3362 Create a frozen SO(N) distribution. 

3363 

3364 See `special_ortho_group_frozen` for more information. 

3365 

3366 """ 

3367 return special_ortho_group_frozen(dim, seed=seed) 

3368 

3369 def _process_parameters(self, dim): 

3370 """ 

3371 Dimension N must be specified; it cannot be inferred. 

3372 """ 

3373 

3374 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

3375 raise ValueError("""Dimension of rotation must be specified, 

3376 and must be a scalar greater than 1.""") 

3377 

3378 return dim 

3379 

3380 def rvs(self, dim, size=1, random_state=None): 

3381 """ 

3382 Draw random samples from SO(N). 

3383 

3384 Parameters 

3385 ---------- 

3386 dim : integer 

3387 Dimension of rotation space (N). 

3388 size : integer, optional 

3389 Number of samples to draw (default 1). 

3390 

3391 Returns 

3392 ------- 

3393 rvs : ndarray or scalar 

3394 Random size N-dimensional matrices, dimension (size, dim, dim) 

3395 

3396 """ 

3397 random_state = self._get_random_state(random_state) 

3398 

3399 size = int(size) 

3400 if size > 1: 

3401 return np.array([self.rvs(dim, size=1, random_state=random_state) 

3402 for i in range(size)]) 

3403 

3404 dim = self._process_parameters(dim) 

3405 

3406 H = np.eye(dim) 

3407 D = np.empty((dim,)) 

3408 for n in range(dim-1): 

3409 x = random_state.normal(size=(dim-n,)) 

3410 norm2 = np.dot(x, x) 

3411 x0 = x[0].item() 

3412 D[n] = np.sign(x[0]) if x[0] != 0 else 1 

3413 x[0] += D[n]*np.sqrt(norm2) 

3414 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.) 

3415 # Householder transformation 

3416 H[:, n:] -= np.outer(np.dot(H[:, n:], x), x) 

3417 D[-1] = (-1)**(dim-1)*D[:-1].prod() 

3418 # Equivalent to np.dot(np.diag(D), H) but faster, apparently 

3419 H = (D*H.T).T 

3420 return H 

3421 

3422 

3423special_ortho_group = special_ortho_group_gen() 

3424 

3425 

3426class special_ortho_group_frozen(multi_rv_frozen): 

3427 def __init__(self, dim=None, seed=None): 

3428 """ 

3429 Create a frozen SO(N) distribution. 

3430 

3431 Parameters 

3432 ---------- 

3433 dim : scalar 

3434 Dimension of matrices 

3435 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional 

3436 This parameter defines the object to use for drawing random 

3437 variates. 

3438 If `seed` is `None` the `~np.random.RandomState` singleton is used. 

3439 If `seed` is an int, a new ``RandomState`` instance is used, seeded 

3440 with seed. 

3441 If `seed` is already a ``RandomState`` or ``Generator`` instance, 

3442 then that object is used. 

3443 Default is None. 

3444 

3445 Examples 

3446 -------- 

3447 >>> from scipy.stats import special_ortho_group 

3448 >>> g = special_ortho_group(5) 

3449 >>> x = g.rvs() 

3450 

3451 """ 

3452 self._dist = special_ortho_group_gen(seed) 

3453 self.dim = self._dist._process_parameters(dim) 

3454 

3455 def rvs(self, size=1, random_state=None): 

3456 return self._dist.rvs(self.dim, size, random_state) 

3457 

3458 

3459class ortho_group_gen(multi_rv_generic): 

3460 r""" 

3461 A matrix-valued O(N) random variable. 

3462 

3463 Return a random orthogonal matrix, drawn from the O(N) Haar 

3464 distribution (the only uniform distribution on O(N)). 

3465 

3466 The `dim` keyword specifies the dimension N. 

3467 

3468 Methods 

3469 ------- 

3470 ``rvs(dim=None, size=1, random_state=None)`` 

3471 Draw random samples from O(N). 

3472 

3473 Parameters 

3474 ---------- 

3475 dim : scalar 

3476 Dimension of matrices 

3477 

3478 Notes 

3479 ---------- 

3480 This class is closely related to `special_ortho_group`. 

3481 

3482 Some care is taken to avoid numerical error, as per the paper by Mezzadri. 

3483 

3484 References 

3485 ---------- 

3486 .. [1] F. Mezzadri, "How to generate random matrices from the classical 

3487 compact groups", :arXiv:`math-ph/0609050v2`. 

3488 

3489 Examples 

3490 -------- 

3491 >>> from scipy.stats import ortho_group 

3492 >>> x = ortho_group.rvs(3) 

3493 

3494 >>> np.dot(x, x.T) 

3495 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

3496 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

3497 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

3498 

3499 >>> import scipy.linalg 

3500 >>> np.fabs(scipy.linalg.det(x)) 

3501 1.0 

3502 

3503 This generates one random matrix from O(3). It is orthogonal and 

3504 has a determinant of +1 or -1. 

3505 

3506 """ 

3507 

3508 def __init__(self, seed=None): 

3509 super(ortho_group_gen, self).__init__(seed) 

3510 self.__doc__ = doccer.docformat(self.__doc__) 

3511 

3512 def _process_parameters(self, dim): 

3513 """ 

3514 Dimension N must be specified; it cannot be inferred. 

3515 """ 

3516 

3517 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

3518 raise ValueError("Dimension of rotation must be specified," 

3519 "and must be a scalar greater than 1.") 

3520 

3521 return dim 

3522 

3523 def rvs(self, dim, size=1, random_state=None): 

3524 """ 

3525 Draw random samples from O(N). 

3526 

3527 Parameters 

3528 ---------- 

3529 dim : integer 

3530 Dimension of rotation space (N). 

3531 size : integer, optional 

3532 Number of samples to draw (default 1). 

3533 

3534 Returns 

3535 ------- 

3536 rvs : ndarray or scalar 

3537 Random size N-dimensional matrices, dimension (size, dim, dim) 

3538 

3539 """ 

3540 random_state = self._get_random_state(random_state) 

3541 

3542 size = int(size) 

3543 if size > 1: 

3544 return np.array([self.rvs(dim, size=1, random_state=random_state) 

3545 for i in range(size)]) 

3546 

3547 dim = self._process_parameters(dim) 

3548 

3549 H = np.eye(dim) 

3550 for n in range(dim): 

3551 x = random_state.normal(size=(dim-n,)) 

3552 norm2 = np.dot(x, x) 

3553 x0 = x[0].item() 

3554 # random sign, 50/50, but chosen carefully to avoid roundoff error 

3555 D = np.sign(x[0]) if x[0] != 0 else 1 

3556 x[0] += D * np.sqrt(norm2) 

3557 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.) 

3558 # Householder transformation 

3559 H[:, n:] = -D * (H[:, n:] - np.outer(np.dot(H[:, n:], x), x)) 

3560 return H 

3561 

3562 

3563ortho_group = ortho_group_gen() 

3564 

3565 

3566class random_correlation_gen(multi_rv_generic): 

3567 r""" 

3568 A random correlation matrix. 

3569 

3570 Return a random correlation matrix, given a vector of eigenvalues. 

3571 

3572 The `eigs` keyword specifies the eigenvalues of the correlation matrix, 

3573 and implies the dimension. 

3574 

3575 Methods 

3576 ------- 

3577 ``rvs(eigs=None, random_state=None)`` 

3578 Draw random correlation matrices, all with eigenvalues eigs. 

3579 

3580 Parameters 

3581 ---------- 

3582 eigs : 1d ndarray 

3583 Eigenvalues of correlation matrix. 

3584 

3585 Notes 

3586 ---------- 

3587 

3588 Generates a random correlation matrix following a numerically stable 

3589 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N) 

3590 similarity transformation to construct a symmetric positive semi-definite 

3591 matrix, and applies a series of Givens rotations to scale it to have ones 

3592 on the diagonal. 

3593 

3594 References 

3595 ---------- 

3596 

3597 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation 

3598 of correlation matrices and their factors", BIT 2000, Vol. 40, 

3599 No. 4, pp. 640 651 

3600 

3601 Examples 

3602 -------- 

3603 >>> from scipy.stats import random_correlation 

3604 >>> np.random.seed(514) 

3605 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5)) 

3606 >>> x 

3607 array([[ 1. , -0.20387311, 0.18366501, -0.04953711], 

3608 [-0.20387311, 1. , -0.24351129, 0.06703474], 

3609 [ 0.18366501, -0.24351129, 1. , 0.38530195], 

3610 [-0.04953711, 0.06703474, 0.38530195, 1. ]]) 

3611 >>> import scipy.linalg 

3612 >>> e, v = scipy.linalg.eigh(x) 

3613 >>> e 

3614 array([ 0.5, 0.8, 1.2, 1.5]) 

3615 

3616 """ 

3617 

3618 def __init__(self, seed=None): 

3619 super(random_correlation_gen, self).__init__(seed) 

3620 self.__doc__ = doccer.docformat(self.__doc__) 

3621 

3622 def _process_parameters(self, eigs, tol): 

3623 eigs = np.asarray(eigs, dtype=float) 

3624 dim = eigs.size 

3625 

3626 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1: 

3627 raise ValueError("Array 'eigs' must be a vector of length " 

3628 "greater than 1.") 

3629 

3630 if np.fabs(np.sum(eigs) - dim) > tol: 

3631 raise ValueError("Sum of eigenvalues must equal dimensionality.") 

3632 

3633 for x in eigs: 

3634 if x < -tol: 

3635 raise ValueError("All eigenvalues must be non-negative.") 

3636 

3637 return dim, eigs 

3638 

3639 def _givens_to_1(self, aii, ajj, aij): 

3640 """Computes a 2x2 Givens matrix to put 1's on the diagonal. 

3641 

3642 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ]. 

3643 

3644 The output matrix g is a 2x2 anti-symmetric matrix of the form 

3645 [ c s ; -s c ]; the elements c and s are returned. 

3646 

3647 Applying the output matrix to the input matrix (as b=g.T M g) 

3648 results in a matrix with bii=1, provided tr(M) - det(M) >= 1 

3649 and floating point issues do not occur. Otherwise, some other 

3650 valid rotation is returned. When tr(M)==2, also bjj=1. 

3651 

3652 """ 

3653 aiid = aii - 1. 

3654 ajjd = ajj - 1. 

3655 

3656 if ajjd == 0: 

3657 # ajj==1, so swap aii and ajj to avoid division by zero 

3658 return 0., 1. 

3659 

3660 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0)) 

3661 

3662 # The choice of t should be chosen to avoid cancellation [1] 

3663 t = (aij + math.copysign(dd, aij)) / ajjd 

3664 c = 1. / math.sqrt(1. + t*t) 

3665 if c == 0: 

3666 # Underflow 

3667 s = 1.0 

3668 else: 

3669 s = c*t 

3670 return c, s 

3671 

3672 def _to_corr(self, m): 

3673 """ 

3674 Given a psd matrix m, rotate to put one's on the diagonal, turning it 

3675 into a correlation matrix. This also requires the trace equal the 

3676 dimensionality. Note: modifies input matrix 

3677 """ 

3678 # Check requirements for in-place Givens 

3679 if not (m.flags.c_contiguous and m.dtype == np.float64 and 

3680 m.shape[0] == m.shape[1]): 

3681 raise ValueError() 

3682 

3683 d = m.shape[0] 

3684 for i in range(d-1): 

3685 if m[i, i] == 1: 

3686 continue 

3687 elif m[i, i] > 1: 

3688 for j in range(i+1, d): 

3689 if m[j, j] < 1: 

3690 break 

3691 else: 

3692 for j in range(i+1, d): 

3693 if m[j, j] > 1: 

3694 break 

3695 

3696 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j]) 

3697 

3698 # Use BLAS to apply Givens rotations in-place. Equivalent to: 

3699 # g = np.eye(d) 

3700 # g[i, i] = g[j,j] = c 

3701 # g[j, i] = -s; g[i, j] = s 

3702 # m = np.dot(g.T, np.dot(m, g)) 

3703 mv = m.ravel() 

3704 drot(mv, mv, c, -s, n=d, 

3705 offx=i*d, incx=1, offy=j*d, incy=1, 

3706 overwrite_x=True, overwrite_y=True) 

3707 drot(mv, mv, c, -s, n=d, 

3708 offx=i, incx=d, offy=j, incy=d, 

3709 overwrite_x=True, overwrite_y=True) 

3710 

3711 return m 

3712 

3713 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7): 

3714 """ 

3715 Draw random correlation matrices 

3716 

3717 Parameters 

3718 ---------- 

3719 eigs : 1d ndarray 

3720 Eigenvalues of correlation matrix 

3721 tol : float, optional 

3722 Tolerance for input parameter checks 

3723 diag_tol : float, optional 

3724 Tolerance for deviation of the diagonal of the resulting 

3725 matrix. Default: 1e-7 

3726 

3727 Raises 

3728 ------ 

3729 RuntimeError 

3730 Floating point error prevented generating a valid correlation 

3731 matrix. 

3732 

3733 Returns 

3734 ------- 

3735 rvs : ndarray or scalar 

3736 Random size N-dimensional matrices, dimension (size, dim, dim), 

3737 each having eigenvalues eigs. 

3738 

3739 """ 

3740 dim, eigs = self._process_parameters(eigs, tol=tol) 

3741 

3742 random_state = self._get_random_state(random_state) 

3743 

3744 m = ortho_group.rvs(dim, random_state=random_state) 

3745 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m 

3746 m = self._to_corr(m) # Carefully rotate to unit diagonal 

3747 

3748 # Check diagonal 

3749 if abs(m.diagonal() - 1).max() > diag_tol: 

3750 raise RuntimeError("Failed to generate a valid correlation matrix") 

3751 

3752 return m 

3753 

3754 

3755random_correlation = random_correlation_gen() 

3756 

3757 

3758class unitary_group_gen(multi_rv_generic): 

3759 r""" 

3760 A matrix-valued U(N) random variable. 

3761 

3762 Return a random unitary matrix. 

3763 

3764 The `dim` keyword specifies the dimension N. 

3765 

3766 Methods 

3767 ------- 

3768 ``rvs(dim=None, size=1, random_state=None)`` 

3769 Draw random samples from U(N). 

3770 

3771 Parameters 

3772 ---------- 

3773 dim : scalar 

3774 Dimension of matrices 

3775 

3776 Notes 

3777 ---------- 

3778 This class is similar to `ortho_group`. 

3779 

3780 References 

3781 ---------- 

3782 .. [1] F. Mezzadri, "How to generate random matrices from the classical 

3783 compact groups", arXiv:math-ph/0609050v2. 

3784 

3785 Examples 

3786 -------- 

3787 >>> from scipy.stats import unitary_group 

3788 >>> x = unitary_group.rvs(3) 

3789 

3790 >>> np.dot(x, x.conj().T) 

3791 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16], 

3792 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16], 

3793 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]]) 

3794 

3795 This generates one random matrix from U(3). The dot product confirms that 

3796 it is unitary up to machine precision. 

3797 

3798 """ 

3799 

3800 def __init__(self, seed=None): 

3801 super(unitary_group_gen, self).__init__(seed) 

3802 self.__doc__ = doccer.docformat(self.__doc__) 

3803 

3804 def _process_parameters(self, dim): 

3805 """ 

3806 Dimension N must be specified; it cannot be inferred. 

3807 """ 

3808 

3809 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim): 

3810 raise ValueError("Dimension of rotation must be specified," 

3811 "and must be a scalar greater than 1.") 

3812 

3813 return dim 

3814 

3815 def rvs(self, dim, size=1, random_state=None): 

3816 """ 

3817 Draw random samples from U(N). 

3818 

3819 Parameters 

3820 ---------- 

3821 dim : integer 

3822 Dimension of space (N). 

3823 size : integer, optional 

3824 Number of samples to draw (default 1). 

3825 

3826 Returns 

3827 ------- 

3828 rvs : ndarray or scalar 

3829 Random size N-dimensional matrices, dimension (size, dim, dim) 

3830 

3831 """ 

3832 random_state = self._get_random_state(random_state) 

3833 

3834 size = int(size) 

3835 if size > 1: 

3836 return np.array([self.rvs(dim, size=1, random_state=random_state) 

3837 for i in range(size)]) 

3838 

3839 dim = self._process_parameters(dim) 

3840 

3841 z = 1/math.sqrt(2)*(random_state.normal(size=(dim, dim)) + 

3842 1j*random_state.normal(size=(dim, dim))) 

3843 q, r = scipy.linalg.qr(z) 

3844 d = r.diagonal() 

3845 q *= d/abs(d) 

3846 return q 

3847 

3848 

3849unitary_group = unitary_group_gen()