Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/stats/_multivariate.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Author: Joris Vankerschaver 2013
3#
4import math
5import numpy as np
6from numpy import asarray_chkfinite, asarray
7import scipy.linalg
8from scipy._lib import doccer
9from scipy.special import gammaln, psi, multigammaln, xlogy, entr
10from scipy._lib._util import check_random_state
11from scipy.linalg.blas import drot
12from scipy.linalg.misc import LinAlgError
13from scipy.linalg.lapack import get_lapack_funcs
15from ._discrete_distns import binom
16from . import mvn
18__all__ = ['multivariate_normal',
19 'matrix_normal',
20 'dirichlet',
21 'wishart',
22 'invwishart',
23 'multinomial',
24 'special_ortho_group',
25 'ortho_group',
26 'random_correlation',
27 'unitary_group']
29_LOG_2PI = np.log(2 * np.pi)
30_LOG_2 = np.log(2)
31_LOG_PI = np.log(np.pi)
34_doc_random_state = """\
35random_state : {None, int, np.random.RandomState, np.random.Generator}, optional
36 Used for drawing random variates.
37 If `seed` is `None` the `~np.random.RandomState` singleton is used.
38 If `seed` is an int, a new ``RandomState`` instance is used, seeded
39 with seed.
40 If `seed` is already a ``RandomState`` or ``Generator`` instance,
41 then that object is used.
42 Default is None.
43"""
46def _squeeze_output(out):
47 """
48 Remove single-dimensional entries from array and convert to scalar,
49 if necessary.
51 """
52 out = out.squeeze()
53 if out.ndim == 0:
54 out = out[()]
55 return out
58def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
59 """
60 Determine which eigenvalues are "small" given the spectrum.
62 This is for compatibility across various linear algebra functions
63 that should agree about whether or not a Hermitian matrix is numerically
64 singular and what is its numerical matrix rank.
65 This is designed to be compatible with scipy.linalg.pinvh.
67 Parameters
68 ----------
69 spectrum : 1d ndarray
70 Array of eigenvalues of a Hermitian matrix.
71 cond, rcond : float, optional
72 Cutoff for small eigenvalues.
73 Singular values smaller than rcond * largest_eigenvalue are
74 considered zero.
75 If None or -1, suitable machine precision is used.
77 Returns
78 -------
79 eps : float
80 Magnitude cutoff for numerical negligibility.
82 """
83 if rcond is not None:
84 cond = rcond
85 if cond in [None, -1]:
86 t = spectrum.dtype.char.lower()
87 factor = {'f': 1E3, 'd': 1E6}
88 cond = factor[t] * np.finfo(t).eps
89 eps = cond * np.max(abs(spectrum))
90 return eps
93def _pinv_1d(v, eps=1e-5):
94 """
95 A helper function for computing the pseudoinverse.
97 Parameters
98 ----------
99 v : iterable of numbers
100 This may be thought of as a vector of eigenvalues or singular values.
101 eps : float
102 Values with magnitude no greater than eps are considered negligible.
104 Returns
105 -------
106 v_pinv : 1d float ndarray
107 A vector of pseudo-inverted numbers.
109 """
110 return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
113class _PSD(object):
114 """
115 Compute coordinated functions of a symmetric positive semidefinite matrix.
117 This class addresses two issues. Firstly it allows the pseudoinverse,
118 the logarithm of the pseudo-determinant, and the rank of the matrix
119 to be computed using one call to eigh instead of three.
120 Secondly it allows these functions to be computed in a way
121 that gives mutually compatible results.
122 All of the functions are computed with a common understanding as to
123 which of the eigenvalues are to be considered negligibly small.
124 The functions are designed to coordinate with scipy.linalg.pinvh()
125 but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
127 Parameters
128 ----------
129 M : array_like
130 Symmetric positive semidefinite matrix (2-D).
131 cond, rcond : float, optional
132 Cutoff for small eigenvalues.
133 Singular values smaller than rcond * largest_eigenvalue are
134 considered zero.
135 If None or -1, suitable machine precision is used.
136 lower : bool, optional
137 Whether the pertinent array data is taken from the lower
138 or upper triangle of M. (Default: lower)
139 check_finite : bool, optional
140 Whether to check that the input matrices contain only finite
141 numbers. Disabling may give a performance gain, but may result
142 in problems (crashes, non-termination) if the inputs do contain
143 infinities or NaNs.
144 allow_singular : bool, optional
145 Whether to allow a singular matrix. (Default: True)
147 Notes
148 -----
149 The arguments are similar to those of scipy.linalg.pinvh().
151 """
153 def __init__(self, M, cond=None, rcond=None, lower=True,
154 check_finite=True, allow_singular=True):
155 # Compute the symmetric eigendecomposition.
156 # Note that eigh takes care of array conversion, chkfinite,
157 # and assertion that the matrix is square.
158 s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
160 eps = _eigvalsh_to_eps(s, cond, rcond)
161 if np.min(s) < -eps:
162 raise ValueError('the input matrix must be positive semidefinite')
163 d = s[s > eps]
164 if len(d) < len(s) and not allow_singular:
165 raise np.linalg.LinAlgError('singular matrix')
166 s_pinv = _pinv_1d(s, eps)
167 U = np.multiply(u, np.sqrt(s_pinv))
169 # Initialize the eagerly precomputed attributes.
170 self.rank = len(d)
171 self.U = U
172 self.log_pdet = np.sum(np.log(d))
174 # Initialize an attribute to be lazily computed.
175 self._pinv = None
177 @property
178 def pinv(self):
179 if self._pinv is None:
180 self._pinv = np.dot(self.U, self.U.T)
181 return self._pinv
184class multi_rv_generic(object):
185 """
186 Class which encapsulates common functionality between all multivariate
187 distributions.
189 """
190 def __init__(self, seed=None):
191 super(multi_rv_generic, self).__init__()
192 self._random_state = check_random_state(seed)
194 @property
195 def random_state(self):
196 """ Get or set the RandomState object for generating random variates.
198 This can be either None, int, a RandomState instance, or a
199 np.random.Generator instance.
201 If None (or np.random), use the RandomState singleton used by
202 np.random.
203 If already a RandomState or Generator instance, use it.
204 If an int, use a new RandomState instance seeded with seed.
206 """
207 return self._random_state
209 @random_state.setter
210 def random_state(self, seed):
211 self._random_state = check_random_state(seed)
213 def _get_random_state(self, random_state):
214 if random_state is not None:
215 return check_random_state(random_state)
216 else:
217 return self._random_state
220class multi_rv_frozen(object):
221 """
222 Class which encapsulates common functionality between all frozen
223 multivariate distributions.
224 """
225 @property
226 def random_state(self):
227 return self._dist._random_state
229 @random_state.setter
230 def random_state(self, seed):
231 self._dist._random_state = check_random_state(seed)
234_mvn_doc_default_callparams = """\
235mean : array_like, optional
236 Mean of the distribution (default zero)
237cov : array_like, optional
238 Covariance matrix of the distribution (default one)
239allow_singular : bool, optional
240 Whether to allow a singular covariance matrix. (Default: False)
241"""
243_mvn_doc_callparams_note = \
244 """Setting the parameter `mean` to `None` is equivalent to having `mean`
245 be the zero-vector. The parameter `cov` can be a scalar, in which case
246 the covariance matrix is the identity times that value, a vector of
247 diagonal entries for the covariance matrix, or a two-dimensional
248 array_like.
249 """
251_mvn_doc_frozen_callparams = ""
253_mvn_doc_frozen_callparams_note = \
254 """See class definition for a detailed description of parameters."""
256mvn_docdict_params = {
257 '_mvn_doc_default_callparams': _mvn_doc_default_callparams,
258 '_mvn_doc_callparams_note': _mvn_doc_callparams_note,
259 '_doc_random_state': _doc_random_state
260}
262mvn_docdict_noparams = {
263 '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,
264 '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,
265 '_doc_random_state': _doc_random_state
266}
269class multivariate_normal_gen(multi_rv_generic):
270 r"""
271 A multivariate normal random variable.
273 The `mean` keyword specifies the mean. The `cov` keyword specifies the
274 covariance matrix.
276 Methods
277 -------
278 ``pdf(x, mean=None, cov=1, allow_singular=False)``
279 Probability density function.
280 ``logpdf(x, mean=None, cov=1, allow_singular=False)``
281 Log of the probability density function.
282 ``cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)``
283 Cumulative distribution function.
284 ``logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)``
285 Log of the cumulative distribution function.
286 ``rvs(mean=None, cov=1, size=1, random_state=None)``
287 Draw random samples from a multivariate normal distribution.
288 ``entropy()``
289 Compute the differential entropy of the multivariate normal.
291 Parameters
292 ----------
293 x : array_like
294 Quantiles, with the last axis of `x` denoting the components.
295 %(_mvn_doc_default_callparams)s
296 %(_doc_random_state)s
298 Alternatively, the object may be called (as a function) to fix the mean
299 and covariance parameters, returning a "frozen" multivariate normal
300 random variable:
302 rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
303 - Frozen object with the same methods but holding the given
304 mean and covariance fixed.
306 Notes
307 -----
308 %(_mvn_doc_callparams_note)s
310 The covariance matrix `cov` must be a (symmetric) positive
311 semi-definite matrix. The determinant and inverse of `cov` are computed
312 as the pseudo-determinant and pseudo-inverse, respectively, so
313 that `cov` does not need to have full rank.
315 The probability density function for `multivariate_normal` is
317 .. math::
319 f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}
320 \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
322 where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
323 and :math:`k` is the dimension of the space where :math:`x` takes values.
325 .. versionadded:: 0.14.0
327 Examples
328 --------
329 >>> import matplotlib.pyplot as plt
330 >>> from scipy.stats import multivariate_normal
332 >>> x = np.linspace(0, 5, 10, endpoint=False)
333 >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
334 array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,
335 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])
336 >>> fig1 = plt.figure()
337 >>> ax = fig1.add_subplot(111)
338 >>> ax.plot(x, y)
340 The input quantiles can be any shape of array, as long as the last
341 axis labels the components. This allows us for instance to
342 display the frozen pdf for a non-isotropic random variable in 2D as
343 follows:
345 >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
346 >>> pos = np.dstack((x, y))
347 >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
348 >>> fig2 = plt.figure()
349 >>> ax2 = fig2.add_subplot(111)
350 >>> ax2.contourf(x, y, rv.pdf(pos))
352 """
354 def __init__(self, seed=None):
355 super(multivariate_normal_gen, self).__init__(seed)
356 self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)
358 def __call__(self, mean=None, cov=1, allow_singular=False, seed=None):
359 """
360 Create a frozen multivariate normal distribution.
362 See `multivariate_normal_frozen` for more information.
364 """
365 return multivariate_normal_frozen(mean, cov,
366 allow_singular=allow_singular,
367 seed=seed)
369 def _process_parameters(self, dim, mean, cov):
370 """
371 Infer dimensionality from mean or covariance matrix, ensure that
372 mean and covariance are full vector resp. matrix.
374 """
376 # Try to infer dimensionality
377 if dim is None:
378 if mean is None:
379 if cov is None:
380 dim = 1
381 else:
382 cov = np.asarray(cov, dtype=float)
383 if cov.ndim < 2:
384 dim = 1
385 else:
386 dim = cov.shape[0]
387 else:
388 mean = np.asarray(mean, dtype=float)
389 dim = mean.size
390 else:
391 if not np.isscalar(dim):
392 raise ValueError("Dimension of random variable must be "
393 "a scalar.")
395 # Check input sizes and return full arrays for mean and cov if
396 # necessary
397 if mean is None:
398 mean = np.zeros(dim)
399 mean = np.asarray(mean, dtype=float)
401 if cov is None:
402 cov = 1.0
403 cov = np.asarray(cov, dtype=float)
405 if dim == 1:
406 mean.shape = (1,)
407 cov.shape = (1, 1)
409 if mean.ndim != 1 or mean.shape[0] != dim:
410 raise ValueError("Array 'mean' must be a vector of length %d." %
411 dim)
412 if cov.ndim == 0:
413 cov = cov * np.eye(dim)
414 elif cov.ndim == 1:
415 cov = np.diag(cov)
416 elif cov.ndim == 2 and cov.shape != (dim, dim):
417 rows, cols = cov.shape
418 if rows != cols:
419 msg = ("Array 'cov' must be square if it is two dimensional,"
420 " but cov.shape = %s." % str(cov.shape))
421 else:
422 msg = ("Dimension mismatch: array 'cov' is of shape %s,"
423 " but 'mean' is a vector of length %d.")
424 msg = msg % (str(cov.shape), len(mean))
425 raise ValueError(msg)
426 elif cov.ndim > 2:
427 raise ValueError("Array 'cov' must be at most two-dimensional,"
428 " but cov.ndim = %d" % cov.ndim)
430 return dim, mean, cov
432 def _process_quantiles(self, x, dim):
433 """
434 Adjust quantiles array so that last axis labels the components of
435 each data point.
437 """
438 x = np.asarray(x, dtype=float)
440 if x.ndim == 0:
441 x = x[np.newaxis]
442 elif x.ndim == 1:
443 if dim == 1:
444 x = x[:, np.newaxis]
445 else:
446 x = x[np.newaxis, :]
448 return x
450 def _logpdf(self, x, mean, prec_U, log_det_cov, rank):
451 """
452 Parameters
453 ----------
454 x : ndarray
455 Points at which to evaluate the log of the probability
456 density function
457 mean : ndarray
458 Mean of the distribution
459 prec_U : ndarray
460 A decomposition such that np.dot(prec_U, prec_U.T)
461 is the precision matrix, i.e. inverse of the covariance matrix.
462 log_det_cov : float
463 Logarithm of the determinant of the covariance matrix
464 rank : int
465 Rank of the covariance matrix.
467 Notes
468 -----
469 As this function does no argument checking, it should not be
470 called directly; use 'logpdf' instead.
472 """
473 dev = x - mean
474 maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
475 return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
477 def logpdf(self, x, mean=None, cov=1, allow_singular=False):
478 """
479 Log of the multivariate normal probability density function.
481 Parameters
482 ----------
483 x : array_like
484 Quantiles, with the last axis of `x` denoting the components.
485 %(_mvn_doc_default_callparams)s
487 Returns
488 -------
489 pdf : ndarray or scalar
490 Log of the probability density function evaluated at `x`
492 Notes
493 -----
494 %(_mvn_doc_callparams_note)s
496 """
497 dim, mean, cov = self._process_parameters(None, mean, cov)
498 x = self._process_quantiles(x, dim)
499 psd = _PSD(cov, allow_singular=allow_singular)
500 out = self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)
501 return _squeeze_output(out)
503 def pdf(self, x, mean=None, cov=1, allow_singular=False):
504 """
505 Multivariate normal probability density function.
507 Parameters
508 ----------
509 x : array_like
510 Quantiles, with the last axis of `x` denoting the components.
511 %(_mvn_doc_default_callparams)s
513 Returns
514 -------
515 pdf : ndarray or scalar
516 Probability density function evaluated at `x`
518 Notes
519 -----
520 %(_mvn_doc_callparams_note)s
522 """
523 dim, mean, cov = self._process_parameters(None, mean, cov)
524 x = self._process_quantiles(x, dim)
525 psd = _PSD(cov, allow_singular=allow_singular)
526 out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank))
527 return _squeeze_output(out)
529 def _cdf(self, x, mean, cov, maxpts, abseps, releps):
530 """
531 Parameters
532 ----------
533 x : ndarray
534 Points at which to evaluate the cumulative distribution function.
535 mean : ndarray
536 Mean of the distribution
537 cov : array_like
538 Covariance matrix of the distribution
539 maxpts: integer
540 The maximum number of points to use for integration
541 abseps: float
542 Absolute error tolerance
543 releps: float
544 Relative error tolerance
546 Notes
547 -----
548 As this function does no argument checking, it should not be
549 called directly; use 'cdf' instead.
551 .. versionadded:: 1.0.0
553 """
554 lower = np.full(mean.shape, -np.inf)
555 # mvnun expects 1-d arguments, so process points sequentially
556 func1d = lambda x_slice: mvn.mvnun(lower, x_slice, mean, cov,
557 maxpts, abseps, releps)[0]
558 out = np.apply_along_axis(func1d, -1, x)
559 return _squeeze_output(out)
561 def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
562 abseps=1e-5, releps=1e-5):
563 """
564 Log of the multivariate normal cumulative distribution function.
566 Parameters
567 ----------
568 x : array_like
569 Quantiles, with the last axis of `x` denoting the components.
570 %(_mvn_doc_default_callparams)s
571 maxpts: integer, optional
572 The maximum number of points to use for integration
573 (default `1000000*dim`)
574 abseps: float, optional
575 Absolute error tolerance (default 1e-5)
576 releps: float, optional
577 Relative error tolerance (default 1e-5)
579 Returns
580 -------
581 cdf : ndarray or scalar
582 Log of the cumulative distribution function evaluated at `x`
584 Notes
585 -----
586 %(_mvn_doc_callparams_note)s
588 .. versionadded:: 1.0.0
590 """
591 dim, mean, cov = self._process_parameters(None, mean, cov)
592 x = self._process_quantiles(x, dim)
593 # Use _PSD to check covariance matrix
594 _PSD(cov, allow_singular=allow_singular)
595 if not maxpts:
596 maxpts = 1000000 * dim
597 out = np.log(self._cdf(x, mean, cov, maxpts, abseps, releps))
598 return out
600 def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
601 abseps=1e-5, releps=1e-5):
602 """
603 Multivariate normal cumulative distribution function.
605 Parameters
606 ----------
607 x : array_like
608 Quantiles, with the last axis of `x` denoting the components.
609 %(_mvn_doc_default_callparams)s
610 maxpts: integer, optional
611 The maximum number of points to use for integration
612 (default `1000000*dim`)
613 abseps: float, optional
614 Absolute error tolerance (default 1e-5)
615 releps: float, optional
616 Relative error tolerance (default 1e-5)
618 Returns
619 -------
620 cdf : ndarray or scalar
621 Cumulative distribution function evaluated at `x`
623 Notes
624 -----
625 %(_mvn_doc_callparams_note)s
627 .. versionadded:: 1.0.0
629 """
630 dim, mean, cov = self._process_parameters(None, mean, cov)
631 x = self._process_quantiles(x, dim)
632 # Use _PSD to check covariance matrix
633 _PSD(cov, allow_singular=allow_singular)
634 if not maxpts:
635 maxpts = 1000000 * dim
636 out = self._cdf(x, mean, cov, maxpts, abseps, releps)
637 return out
639 def rvs(self, mean=None, cov=1, size=1, random_state=None):
640 """
641 Draw random samples from a multivariate normal distribution.
643 Parameters
644 ----------
645 %(_mvn_doc_default_callparams)s
646 size : integer, optional
647 Number of samples to draw (default 1).
648 %(_doc_random_state)s
650 Returns
651 -------
652 rvs : ndarray or scalar
653 Random variates of size (`size`, `N`), where `N` is the
654 dimension of the random variable.
656 Notes
657 -----
658 %(_mvn_doc_callparams_note)s
660 """
661 dim, mean, cov = self._process_parameters(None, mean, cov)
663 random_state = self._get_random_state(random_state)
664 out = random_state.multivariate_normal(mean, cov, size)
665 return _squeeze_output(out)
667 def entropy(self, mean=None, cov=1):
668 """
669 Compute the differential entropy of the multivariate normal.
671 Parameters
672 ----------
673 %(_mvn_doc_default_callparams)s
675 Returns
676 -------
677 h : scalar
678 Entropy of the multivariate normal distribution
680 Notes
681 -----
682 %(_mvn_doc_callparams_note)s
684 """
685 dim, mean, cov = self._process_parameters(None, mean, cov)
686 _, logdet = np.linalg.slogdet(2 * np.pi * np.e * cov)
687 return 0.5 * logdet
690multivariate_normal = multivariate_normal_gen()
693class multivariate_normal_frozen(multi_rv_frozen):
694 def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,
695 maxpts=None, abseps=1e-5, releps=1e-5):
696 """
697 Create a frozen multivariate normal distribution.
699 Parameters
700 ----------
701 mean : array_like, optional
702 Mean of the distribution (default zero)
703 cov : array_like, optional
704 Covariance matrix of the distribution (default one)
705 allow_singular : bool, optional
706 If this flag is True then tolerate a singular
707 covariance matrix (default False).
708 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
709 This parameter defines the object to use for drawing random
710 variates.
711 If `seed` is `None` the `~np.random.RandomState` singleton is used.
712 If `seed` is an int, a new ``RandomState`` instance is used, seeded
713 with seed.
714 If `seed` is already a ``RandomState`` or ``Generator`` instance,
715 then that object is used.
716 Default is None.
717 maxpts: integer, optional
718 The maximum number of points to use for integration of the
719 cumulative distribution function (default `1000000*dim`)
720 abseps: float, optional
721 Absolute error tolerance for the cumulative distribution function
722 (default 1e-5)
723 releps: float, optional
724 Relative error tolerance for the cumulative distribution function
725 (default 1e-5)
727 Examples
728 --------
729 When called with the default parameters, this will create a 1D random
730 variable with mean 0 and covariance 1:
732 >>> from scipy.stats import multivariate_normal
733 >>> r = multivariate_normal()
734 >>> r.mean
735 array([ 0.])
736 >>> r.cov
737 array([[1.]])
739 """
740 self._dist = multivariate_normal_gen(seed)
741 self.dim, self.mean, self.cov = self._dist._process_parameters(
742 None, mean, cov)
743 self.cov_info = _PSD(self.cov, allow_singular=allow_singular)
744 if not maxpts:
745 maxpts = 1000000 * self.dim
746 self.maxpts = maxpts
747 self.abseps = abseps
748 self.releps = releps
750 def logpdf(self, x):
751 x = self._dist._process_quantiles(x, self.dim)
752 out = self._dist._logpdf(x, self.mean, self.cov_info.U,
753 self.cov_info.log_pdet, self.cov_info.rank)
754 return _squeeze_output(out)
756 def pdf(self, x):
757 return np.exp(self.logpdf(x))
759 def logcdf(self, x):
760 return np.log(self.cdf(x))
762 def cdf(self, x):
763 x = self._dist._process_quantiles(x, self.dim)
764 out = self._dist._cdf(x, self.mean, self.cov, self.maxpts, self.abseps,
765 self.releps)
766 return _squeeze_output(out)
768 def rvs(self, size=1, random_state=None):
769 return self._dist.rvs(self.mean, self.cov, size, random_state)
771 def entropy(self):
772 """
773 Computes the differential entropy of the multivariate normal.
775 Returns
776 -------
777 h : scalar
778 Entropy of the multivariate normal distribution
780 """
781 log_pdet = self.cov_info.log_pdet
782 rank = self.cov_info.rank
783 return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
786# Set frozen generator docstrings from corresponding docstrings in
787# multivariate_normal_gen and fill in default strings in class docstrings
788for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:
789 method = multivariate_normal_gen.__dict__[name]
790 method_frozen = multivariate_normal_frozen.__dict__[name]
791 method_frozen.__doc__ = doccer.docformat(method.__doc__,
792 mvn_docdict_noparams)
793 method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
795_matnorm_doc_default_callparams = """\
796mean : array_like, optional
797 Mean of the distribution (default: `None`)
798rowcov : array_like, optional
799 Among-row covariance matrix of the distribution (default: `1`)
800colcov : array_like, optional
801 Among-column covariance matrix of the distribution (default: `1`)
802"""
804_matnorm_doc_callparams_note = \
805 """If `mean` is set to `None` then a matrix of zeros is used for the mean.
806 The dimensions of this matrix are inferred from the shape of `rowcov` and
807 `colcov`, if these are provided, or set to `1` if ambiguous.
809 `rowcov` and `colcov` can be two-dimensional array_likes specifying the
810 covariance matrices directly. Alternatively, a one-dimensional array will
811 be be interpreted as the entries of a diagonal matrix, and a scalar or
812 zero-dimensional array will be interpreted as this value times the
813 identity matrix.
814 """
816_matnorm_doc_frozen_callparams = ""
818_matnorm_doc_frozen_callparams_note = \
819 """See class definition for a detailed description of parameters."""
821matnorm_docdict_params = {
822 '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,
823 '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,
824 '_doc_random_state': _doc_random_state
825}
827matnorm_docdict_noparams = {
828 '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,
829 '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,
830 '_doc_random_state': _doc_random_state
831}
834class matrix_normal_gen(multi_rv_generic):
835 r"""
836 A matrix normal random variable.
838 The `mean` keyword specifies the mean. The `rowcov` keyword specifies the
839 among-row covariance matrix. The 'colcov' keyword specifies the
840 among-column covariance matrix.
842 Methods
843 -------
844 ``pdf(X, mean=None, rowcov=1, colcov=1)``
845 Probability density function.
846 ``logpdf(X, mean=None, rowcov=1, colcov=1)``
847 Log of the probability density function.
848 ``rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)``
849 Draw random samples.
851 Parameters
852 ----------
853 X : array_like
854 Quantiles, with the last two axes of `X` denoting the components.
855 %(_matnorm_doc_default_callparams)s
856 %(_doc_random_state)s
858 Alternatively, the object may be called (as a function) to fix the mean
859 and covariance parameters, returning a "frozen" matrix normal
860 random variable:
862 rv = matrix_normal(mean=None, rowcov=1, colcov=1)
863 - Frozen object with the same methods but holding the given
864 mean and covariance fixed.
866 Notes
867 -----
868 %(_matnorm_doc_callparams_note)s
870 The covariance matrices specified by `rowcov` and `colcov` must be
871 (symmetric) positive definite. If the samples in `X` are
872 :math:`m \times n`, then `rowcov` must be :math:`m \times m` and
873 `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.
875 The probability density function for `matrix_normal` is
877 .. math::
879 f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}
880 \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}
881 (X-M)^T \right] \right),
883 where :math:`M` is the mean, :math:`U` the among-row covariance matrix,
884 :math:`V` the among-column covariance matrix.
886 The `allow_singular` behaviour of the `multivariate_normal`
887 distribution is not currently supported. Covariance matrices must be
888 full rank.
890 The `matrix_normal` distribution is closely related to the
891 `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`
892 (the vector formed by concatenating the columns of :math:`X`) has a
893 multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`
894 and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker
895 product). Sampling and pdf evaluation are
896 :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but
897 :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,
898 making this equivalent form algorithmically inefficient.
900 .. versionadded:: 0.17.0
902 Examples
903 --------
905 >>> from scipy.stats import matrix_normal
907 >>> M = np.arange(6).reshape(3,2); M
908 array([[0, 1],
909 [2, 3],
910 [4, 5]])
911 >>> U = np.diag([1,2,3]); U
912 array([[1, 0, 0],
913 [0, 2, 0],
914 [0, 0, 3]])
915 >>> V = 0.3*np.identity(2); V
916 array([[ 0.3, 0. ],
917 [ 0. , 0.3]])
918 >>> X = M + 0.1; X
919 array([[ 0.1, 1.1],
920 [ 2.1, 3.1],
921 [ 4.1, 5.1]])
922 >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
923 0.023410202050005054
925 >>> # Equivalent multivariate normal
926 >>> from scipy.stats import multivariate_normal
927 >>> vectorised_X = X.T.flatten()
928 >>> equiv_mean = M.T.flatten()
929 >>> equiv_cov = np.kron(V,U)
930 >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)
931 0.023410202050005054
932 """
934 def __init__(self, seed=None):
935 super(matrix_normal_gen, self).__init__(seed)
936 self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)
938 def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):
939 """
940 Create a frozen matrix normal distribution.
942 See `matrix_normal_frozen` for more information.
944 """
945 return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)
947 def _process_parameters(self, mean, rowcov, colcov):
948 """
949 Infer dimensionality from mean or covariance matrices. Handle
950 defaults. Ensure compatible dimensions.
952 """
954 # Process mean
955 if mean is not None:
956 mean = np.asarray(mean, dtype=float)
957 meanshape = mean.shape
958 if len(meanshape) != 2:
959 raise ValueError("Array `mean` must be two dimensional.")
960 if np.any(meanshape == 0):
961 raise ValueError("Array `mean` has invalid shape.")
963 # Process among-row covariance
964 rowcov = np.asarray(rowcov, dtype=float)
965 if rowcov.ndim == 0:
966 if mean is not None:
967 rowcov = rowcov * np.identity(meanshape[0])
968 else:
969 rowcov = rowcov * np.identity(1)
970 elif rowcov.ndim == 1:
971 rowcov = np.diag(rowcov)
972 rowshape = rowcov.shape
973 if len(rowshape) != 2:
974 raise ValueError("`rowcov` must be a scalar or a 2D array.")
975 if rowshape[0] != rowshape[1]:
976 raise ValueError("Array `rowcov` must be square.")
977 if rowshape[0] == 0:
978 raise ValueError("Array `rowcov` has invalid shape.")
979 numrows = rowshape[0]
981 # Process among-column covariance
982 colcov = np.asarray(colcov, dtype=float)
983 if colcov.ndim == 0:
984 if mean is not None:
985 colcov = colcov * np.identity(meanshape[1])
986 else:
987 colcov = colcov * np.identity(1)
988 elif colcov.ndim == 1:
989 colcov = np.diag(colcov)
990 colshape = colcov.shape
991 if len(colshape) != 2:
992 raise ValueError("`colcov` must be a scalar or a 2D array.")
993 if colshape[0] != colshape[1]:
994 raise ValueError("Array `colcov` must be square.")
995 if colshape[0] == 0:
996 raise ValueError("Array `colcov` has invalid shape.")
997 numcols = colshape[0]
999 # Ensure mean and covariances compatible
1000 if mean is not None:
1001 if meanshape[0] != numrows:
1002 raise ValueError("Arrays `mean` and `rowcov` must have the "
1003 "same number of rows.")
1004 if meanshape[1] != numcols:
1005 raise ValueError("Arrays `mean` and `colcov` must have the "
1006 "same number of columns.")
1007 else:
1008 mean = np.zeros((numrows, numcols))
1010 dims = (numrows, numcols)
1012 return dims, mean, rowcov, colcov
1014 def _process_quantiles(self, X, dims):
1015 """
1016 Adjust quantiles array so that last two axes labels the components of
1017 each data point.
1019 """
1020 X = np.asarray(X, dtype=float)
1021 if X.ndim == 2:
1022 X = X[np.newaxis, :]
1023 if X.shape[-2:] != dims:
1024 raise ValueError("The shape of array `X` is not compatible "
1025 "with the distribution parameters.")
1026 return X
1028 def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,
1029 col_prec_rt, log_det_colcov):
1030 """
1031 Parameters
1032 ----------
1033 dims : tuple
1034 Dimensions of the matrix variates
1035 X : ndarray
1036 Points at which to evaluate the log of the probability
1037 density function
1038 mean : ndarray
1039 Mean of the distribution
1040 row_prec_rt : ndarray
1041 A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)
1042 is the inverse of the among-row covariance matrix
1043 log_det_rowcov : float
1044 Logarithm of the determinant of the among-row covariance matrix
1045 col_prec_rt : ndarray
1046 A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)
1047 is the inverse of the among-column covariance matrix
1048 log_det_colcov : float
1049 Logarithm of the determinant of the among-column covariance matrix
1051 Notes
1052 -----
1053 As this function does no argument checking, it should not be
1054 called directly; use 'logpdf' instead.
1056 """
1057 numrows, numcols = dims
1058 roll_dev = np.rollaxis(X-mean, axis=-1, start=0)
1059 scale_dev = np.tensordot(col_prec_rt.T,
1060 np.dot(roll_dev, row_prec_rt), 1)
1061 maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)
1062 return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov
1063 + numrows*log_det_colcov + maha)
1065 def logpdf(self, X, mean=None, rowcov=1, colcov=1):
1066 """
1067 Log of the matrix normal probability density function.
1069 Parameters
1070 ----------
1071 X : array_like
1072 Quantiles, with the last two axes of `X` denoting the components.
1073 %(_matnorm_doc_default_callparams)s
1075 Returns
1076 -------
1077 logpdf : ndarray
1078 Log of the probability density function evaluated at `X`
1080 Notes
1081 -----
1082 %(_matnorm_doc_callparams_note)s
1084 """
1085 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
1086 colcov)
1087 X = self._process_quantiles(X, dims)
1088 rowpsd = _PSD(rowcov, allow_singular=False)
1089 colpsd = _PSD(colcov, allow_singular=False)
1090 out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,
1091 colpsd.log_pdet)
1092 return _squeeze_output(out)
1094 def pdf(self, X, mean=None, rowcov=1, colcov=1):
1095 """
1096 Matrix normal probability density function.
1098 Parameters
1099 ----------
1100 X : array_like
1101 Quantiles, with the last two axes of `X` denoting the components.
1102 %(_matnorm_doc_default_callparams)s
1104 Returns
1105 -------
1106 pdf : ndarray
1107 Probability density function evaluated at `X`
1109 Notes
1110 -----
1111 %(_matnorm_doc_callparams_note)s
1113 """
1114 return np.exp(self.logpdf(X, mean, rowcov, colcov))
1116 def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):
1117 """
1118 Draw random samples from a matrix normal distribution.
1120 Parameters
1121 ----------
1122 %(_matnorm_doc_default_callparams)s
1123 size : integer, optional
1124 Number of samples to draw (default 1).
1125 %(_doc_random_state)s
1127 Returns
1128 -------
1129 rvs : ndarray or scalar
1130 Random variates of size (`size`, `dims`), where `dims` is the
1131 dimension of the random matrices.
1133 Notes
1134 -----
1135 %(_matnorm_doc_callparams_note)s
1137 """
1138 size = int(size)
1139 dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
1140 colcov)
1141 rowchol = scipy.linalg.cholesky(rowcov, lower=True)
1142 colchol = scipy.linalg.cholesky(colcov, lower=True)
1143 random_state = self._get_random_state(random_state)
1144 std_norm = random_state.standard_normal(size=(dims[1], size, dims[0]))
1145 roll_rvs = np.tensordot(colchol, np.dot(std_norm, rowchol.T), 1)
1146 out = np.rollaxis(roll_rvs.T, axis=1, start=0) + mean[np.newaxis, :, :]
1147 if size == 1:
1148 out = out.reshape(mean.shape)
1149 return out
1152matrix_normal = matrix_normal_gen()
1155class matrix_normal_frozen(multi_rv_frozen):
1156 def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):
1157 """
1158 Create a frozen matrix normal distribution.
1160 Parameters
1161 ----------
1162 %(_matnorm_doc_default_callparams)s
1163 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
1164 This parameter defines the object to use for drawing random
1165 variates.
1166 If `seed` is `None` the `~np.random.RandomState` singleton is used.
1167 If `seed` is an int, a new ``RandomState`` instance is used, seeded
1168 with seed.
1169 If `seed` is already a ``RandomState`` or ``Generator`` instance,
1170 then that object is used.
1171 Default is None.
1173 Examples
1174 --------
1175 >>> from scipy.stats import matrix_normal
1177 >>> distn = matrix_normal(mean=np.zeros((3,3)))
1178 >>> X = distn.rvs(); X
1179 array([[-0.02976962, 0.93339138, -0.09663178],
1180 [ 0.67405524, 0.28250467, -0.93308929],
1181 [-0.31144782, 0.74535536, 1.30412916]])
1182 >>> distn.pdf(X)
1183 2.5160642368346784e-05
1184 >>> distn.logpdf(X)
1185 -10.590229595124615
1186 """
1187 self._dist = matrix_normal_gen(seed)
1188 self.dims, self.mean, self.rowcov, self.colcov = \
1189 self._dist._process_parameters(mean, rowcov, colcov)
1190 self.rowpsd = _PSD(self.rowcov, allow_singular=False)
1191 self.colpsd = _PSD(self.colcov, allow_singular=False)
1193 def logpdf(self, X):
1194 X = self._dist._process_quantiles(X, self.dims)
1195 out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,
1196 self.rowpsd.log_pdet, self.colpsd.U,
1197 self.colpsd.log_pdet)
1198 return _squeeze_output(out)
1200 def pdf(self, X):
1201 return np.exp(self.logpdf(X))
1203 def rvs(self, size=1, random_state=None):
1204 return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,
1205 random_state)
1208# Set frozen generator docstrings from corresponding docstrings in
1209# matrix_normal_gen and fill in default strings in class docstrings
1210for name in ['logpdf', 'pdf', 'rvs']:
1211 method = matrix_normal_gen.__dict__[name]
1212 method_frozen = matrix_normal_frozen.__dict__[name]
1213 method_frozen.__doc__ = doccer.docformat(method.__doc__,
1214 matnorm_docdict_noparams)
1215 method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)
1217_dirichlet_doc_default_callparams = """\
1218alpha : array_like
1219 The concentration parameters. The number of entries determines the
1220 dimensionality of the distribution.
1221"""
1222_dirichlet_doc_frozen_callparams = ""
1224_dirichlet_doc_frozen_callparams_note = \
1225 """See class definition for a detailed description of parameters."""
1227dirichlet_docdict_params = {
1228 '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
1229 '_doc_random_state': _doc_random_state
1230}
1232dirichlet_docdict_noparams = {
1233 '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
1234 '_doc_random_state': _doc_random_state
1235}
1238def _dirichlet_check_parameters(alpha):
1239 alpha = np.asarray(alpha)
1240 if np.min(alpha) <= 0:
1241 raise ValueError("All parameters must be greater than 0")
1242 elif alpha.ndim != 1:
1243 raise ValueError("Parameter vector 'a' must be one dimensional, "
1244 "but a.shape = %s." % (alpha.shape, ))
1245 return alpha
1248def _dirichlet_check_input(alpha, x):
1249 x = np.asarray(x)
1251 if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
1252 raise ValueError("Vector 'x' must have either the same number "
1253 "of entries as, or one entry fewer than, "
1254 "parameter vector 'a', but alpha.shape = %s "
1255 "and x.shape = %s." % (alpha.shape, x.shape))
1257 if x.shape[0] != alpha.shape[0]:
1258 xk = np.array([1 - np.sum(x, 0)])
1259 if xk.ndim == 1:
1260 x = np.append(x, xk)
1261 elif xk.ndim == 2:
1262 x = np.vstack((x, xk))
1263 else:
1264 raise ValueError("The input must be one dimensional or a two "
1265 "dimensional matrix containing the entries.")
1267 if np.min(x) < 0:
1268 raise ValueError("Each entry in 'x' must be greater than or equal "
1269 "to zero.")
1271 if np.max(x) > 1:
1272 raise ValueError("Each entry in 'x' must be smaller or equal one.")
1274 # Check x_i > 0 or alpha_i > 1
1275 xeq0 = (x == 0)
1276 alphalt1 = (alpha < 1)
1277 if x.shape != alpha.shape:
1278 alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)
1279 chk = np.logical_and(xeq0, alphalt1)
1281 if np.sum(chk):
1282 raise ValueError("Each entry in 'x' must be greater than zero if its "
1283 "alpha is less than one.")
1285 if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
1286 raise ValueError("The input vector 'x' must lie within the normal "
1287 "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0))
1289 return x
1292def _lnB(alpha):
1293 r"""
1294 Internal helper function to compute the log of the useful quotient
1296 .. math::
1298 B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}
1299 {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}
1301 Parameters
1302 ----------
1303 %(_dirichlet_doc_default_callparams)s
1305 Returns
1306 -------
1307 B : scalar
1308 Helper quotient, internal use only
1310 """
1311 return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
1314class dirichlet_gen(multi_rv_generic):
1315 r"""
1316 A Dirichlet random variable.
1318 The `alpha` keyword specifies the concentration parameters of the
1319 distribution.
1321 .. versionadded:: 0.15.0
1323 Methods
1324 -------
1325 ``pdf(x, alpha)``
1326 Probability density function.
1327 ``logpdf(x, alpha)``
1328 Log of the probability density function.
1329 ``rvs(alpha, size=1, random_state=None)``
1330 Draw random samples from a Dirichlet distribution.
1331 ``mean(alpha)``
1332 The mean of the Dirichlet distribution
1333 ``var(alpha)``
1334 The variance of the Dirichlet distribution
1335 ``entropy(alpha)``
1336 Compute the differential entropy of the Dirichlet distribution.
1338 Parameters
1339 ----------
1340 x : array_like
1341 Quantiles, with the last axis of `x` denoting the components.
1342 %(_dirichlet_doc_default_callparams)s
1343 %(_doc_random_state)s
1345 Alternatively, the object may be called (as a function) to fix
1346 concentration parameters, returning a "frozen" Dirichlet
1347 random variable:
1349 rv = dirichlet(alpha)
1350 - Frozen object with the same methods but holding the given
1351 concentration parameters fixed.
1353 Notes
1354 -----
1355 Each :math:`\alpha` entry must be positive. The distribution has only
1356 support on the simplex defined by
1358 .. math::
1359 \sum_{i=1}^{K} x_i \le 1
1362 The probability density function for `dirichlet` is
1364 .. math::
1366 f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
1368 where
1370 .. math::
1372 \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}
1373 {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
1375 and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
1376 concentration parameters and :math:`K` is the dimension of the space
1377 where :math:`x` takes values.
1379 Note that the dirichlet interface is somewhat inconsistent.
1380 The array returned by the rvs function is transposed
1381 with respect to the format expected by the pdf and logpdf.
1383 Examples
1384 --------
1385 >>> from scipy.stats import dirichlet
1387 Generate a dirichlet random variable
1389 >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles
1390 >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters
1391 >>> dirichlet.pdf(quantiles, alpha)
1392 0.2843831684937255
1394 The same PDF but following a log scale
1396 >>> dirichlet.logpdf(quantiles, alpha)
1397 -1.2574327653159187
1399 Once we specify the dirichlet distribution
1400 we can then calculate quantities of interest
1402 >>> dirichlet.mean(alpha) # get the mean of the distribution
1403 array([0.01960784, 0.24509804, 0.73529412])
1404 >>> dirichlet.var(alpha) # get variance
1405 array([0.00089829, 0.00864603, 0.00909517])
1406 >>> dirichlet.entropy(alpha) # calculate the differential entropy
1407 -4.3280162474082715
1409 We can also return random samples from the distribution
1411 >>> dirichlet.rvs(alpha, size=1, random_state=1)
1412 array([[0.00766178, 0.24670518, 0.74563305]])
1413 >>> dirichlet.rvs(alpha, size=2, random_state=2)
1414 array([[0.01639427, 0.1292273 , 0.85437844],
1415 [0.00156917, 0.19033695, 0.80809388]])
1417 """
1419 def __init__(self, seed=None):
1420 super(dirichlet_gen, self).__init__(seed)
1421 self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
1423 def __call__(self, alpha, seed=None):
1424 return dirichlet_frozen(alpha, seed=seed)
1426 def _logpdf(self, x, alpha):
1427 """
1428 Parameters
1429 ----------
1430 x : ndarray
1431 Points at which to evaluate the log of the probability
1432 density function
1433 %(_dirichlet_doc_default_callparams)s
1435 Notes
1436 -----
1437 As this function does no argument checking, it should not be
1438 called directly; use 'logpdf' instead.
1440 """
1441 lnB = _lnB(alpha)
1442 return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)
1444 def logpdf(self, x, alpha):
1445 """
1446 Log of the Dirichlet probability density function.
1448 Parameters
1449 ----------
1450 x : array_like
1451 Quantiles, with the last axis of `x` denoting the components.
1452 %(_dirichlet_doc_default_callparams)s
1454 Returns
1455 -------
1456 pdf : ndarray or scalar
1457 Log of the probability density function evaluated at `x`.
1459 """
1460 alpha = _dirichlet_check_parameters(alpha)
1461 x = _dirichlet_check_input(alpha, x)
1463 out = self._logpdf(x, alpha)
1464 return _squeeze_output(out)
1466 def pdf(self, x, alpha):
1467 """
1468 The Dirichlet probability density function.
1470 Parameters
1471 ----------
1472 x : array_like
1473 Quantiles, with the last axis of `x` denoting the components.
1474 %(_dirichlet_doc_default_callparams)s
1476 Returns
1477 -------
1478 pdf : ndarray or scalar
1479 The probability density function evaluated at `x`.
1481 """
1482 alpha = _dirichlet_check_parameters(alpha)
1483 x = _dirichlet_check_input(alpha, x)
1485 out = np.exp(self._logpdf(x, alpha))
1486 return _squeeze_output(out)
1488 def mean(self, alpha):
1489 """
1490 Compute the mean of the dirichlet distribution.
1492 Parameters
1493 ----------
1494 %(_dirichlet_doc_default_callparams)s
1496 Returns
1497 -------
1498 mu : ndarray or scalar
1499 Mean of the Dirichlet distribution.
1501 """
1502 alpha = _dirichlet_check_parameters(alpha)
1504 out = alpha / (np.sum(alpha))
1505 return _squeeze_output(out)
1507 def var(self, alpha):
1508 """
1509 Compute the variance of the dirichlet distribution.
1511 Parameters
1512 ----------
1513 %(_dirichlet_doc_default_callparams)s
1515 Returns
1516 -------
1517 v : ndarray or scalar
1518 Variance of the Dirichlet distribution.
1520 """
1522 alpha = _dirichlet_check_parameters(alpha)
1524 alpha0 = np.sum(alpha)
1525 out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
1526 return _squeeze_output(out)
1528 def entropy(self, alpha):
1529 """
1530 Compute the differential entropy of the dirichlet distribution.
1532 Parameters
1533 ----------
1534 %(_dirichlet_doc_default_callparams)s
1536 Returns
1537 -------
1538 h : scalar
1539 Entropy of the Dirichlet distribution
1541 """
1543 alpha = _dirichlet_check_parameters(alpha)
1545 alpha0 = np.sum(alpha)
1546 lnB = _lnB(alpha)
1547 K = alpha.shape[0]
1549 out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
1550 (alpha - 1) * scipy.special.psi(alpha))
1551 return _squeeze_output(out)
1553 def rvs(self, alpha, size=1, random_state=None):
1554 """
1555 Draw random samples from a Dirichlet distribution.
1557 Parameters
1558 ----------
1559 %(_dirichlet_doc_default_callparams)s
1560 size : int, optional
1561 Number of samples to draw (default 1).
1562 %(_doc_random_state)s
1564 Returns
1565 -------
1566 rvs : ndarray or scalar
1567 Random variates of size (`size`, `N`), where `N` is the
1568 dimension of the random variable.
1570 """
1571 alpha = _dirichlet_check_parameters(alpha)
1572 random_state = self._get_random_state(random_state)
1573 return random_state.dirichlet(alpha, size=size)
1576dirichlet = dirichlet_gen()
1579class dirichlet_frozen(multi_rv_frozen):
1580 def __init__(self, alpha, seed=None):
1581 self.alpha = _dirichlet_check_parameters(alpha)
1582 self._dist = dirichlet_gen(seed)
1584 def logpdf(self, x):
1585 return self._dist.logpdf(x, self.alpha)
1587 def pdf(self, x):
1588 return self._dist.pdf(x, self.alpha)
1590 def mean(self):
1591 return self._dist.mean(self.alpha)
1593 def var(self):
1594 return self._dist.var(self.alpha)
1596 def entropy(self):
1597 return self._dist.entropy(self.alpha)
1599 def rvs(self, size=1, random_state=None):
1600 return self._dist.rvs(self.alpha, size, random_state)
1603# Set frozen generator docstrings from corresponding docstrings in
1604# multivariate_normal_gen and fill in default strings in class docstrings
1605for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
1606 method = dirichlet_gen.__dict__[name]
1607 method_frozen = dirichlet_frozen.__dict__[name]
1608 method_frozen.__doc__ = doccer.docformat(
1609 method.__doc__, dirichlet_docdict_noparams)
1610 method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
1613_wishart_doc_default_callparams = """\
1614df : int
1615 Degrees of freedom, must be greater than or equal to dimension of the
1616 scale matrix
1617scale : array_like
1618 Symmetric positive definite scale matrix of the distribution
1619"""
1621_wishart_doc_callparams_note = ""
1623_wishart_doc_frozen_callparams = ""
1625_wishart_doc_frozen_callparams_note = \
1626 """See class definition for a detailed description of parameters."""
1628wishart_docdict_params = {
1629 '_doc_default_callparams': _wishart_doc_default_callparams,
1630 '_doc_callparams_note': _wishart_doc_callparams_note,
1631 '_doc_random_state': _doc_random_state
1632}
1634wishart_docdict_noparams = {
1635 '_doc_default_callparams': _wishart_doc_frozen_callparams,
1636 '_doc_callparams_note': _wishart_doc_frozen_callparams_note,
1637 '_doc_random_state': _doc_random_state
1638}
1641class wishart_gen(multi_rv_generic):
1642 r"""
1643 A Wishart random variable.
1645 The `df` keyword specifies the degrees of freedom. The `scale` keyword
1646 specifies the scale matrix, which must be symmetric and positive definite.
1647 In this context, the scale matrix is often interpreted in terms of a
1648 multivariate normal precision matrix (the inverse of the covariance
1649 matrix).
1651 Methods
1652 -------
1653 ``pdf(x, df, scale)``
1654 Probability density function.
1655 ``logpdf(x, df, scale)``
1656 Log of the probability density function.
1657 ``rvs(df, scale, size=1, random_state=None)``
1658 Draw random samples from a Wishart distribution.
1659 ``entropy()``
1660 Compute the differential entropy of the Wishart distribution.
1662 Parameters
1663 ----------
1664 x : array_like
1665 Quantiles, with the last axis of `x` denoting the components.
1666 %(_doc_default_callparams)s
1667 %(_doc_random_state)s
1669 Alternatively, the object may be called (as a function) to fix the degrees
1670 of freedom and scale parameters, returning a "frozen" Wishart random
1671 variable:
1673 rv = wishart(df=1, scale=1)
1674 - Frozen object with the same methods but holding the given
1675 degrees of freedom and scale fixed.
1677 See Also
1678 --------
1679 invwishart, chi2
1681 Notes
1682 -----
1683 %(_doc_callparams_note)s
1685 The scale matrix `scale` must be a symmetric positive definite
1686 matrix. Singular matrices, including the symmetric positive semi-definite
1687 case, are not supported.
1689 The Wishart distribution is often denoted
1691 .. math::
1693 W_p(\nu, \Sigma)
1695 where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the
1696 :math:`p \times p` scale matrix.
1698 The probability density function for `wishart` has support over positive
1699 definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then
1700 its PDF is given by:
1702 .. math::
1704 f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }
1705 |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}
1706 \exp\left( -tr(\Sigma^{-1} S) / 2 \right)
1708 If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then
1709 :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).
1711 If the scale matrix is 1-dimensional and equal to one, then the Wishart
1712 distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`
1713 distribution.
1715 .. versionadded:: 0.16.0
1717 References
1718 ----------
1719 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
1720 Wiley, 1983.
1721 .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate
1722 Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.
1724 Examples
1725 --------
1726 >>> import matplotlib.pyplot as plt
1727 >>> from scipy.stats import wishart, chi2
1728 >>> x = np.linspace(1e-5, 8, 100)
1729 >>> w = wishart.pdf(x, df=3, scale=1); w[:5]
1730 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
1731 >>> c = chi2.pdf(x, 3); c[:5]
1732 array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
1733 >>> plt.plot(x, w)
1735 The input quantiles can be any shape of array, as long as the last
1736 axis labels the components.
1738 """
1740 def __init__(self, seed=None):
1741 super(wishart_gen, self).__init__(seed)
1742 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
1744 def __call__(self, df=None, scale=None, seed=None):
1745 """
1746 Create a frozen Wishart distribution.
1748 See `wishart_frozen` for more information.
1750 """
1751 return wishart_frozen(df, scale, seed)
1753 def _process_parameters(self, df, scale):
1754 if scale is None:
1755 scale = 1.0
1756 scale = np.asarray(scale, dtype=float)
1758 if scale.ndim == 0:
1759 scale = scale[np.newaxis, np.newaxis]
1760 elif scale.ndim == 1:
1761 scale = np.diag(scale)
1762 elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:
1763 raise ValueError("Array 'scale' must be square if it is two"
1764 " dimensional, but scale.scale = %s."
1765 % str(scale.shape))
1766 elif scale.ndim > 2:
1767 raise ValueError("Array 'scale' must be at most two-dimensional,"
1768 " but scale.ndim = %d" % scale.ndim)
1770 dim = scale.shape[0]
1772 if df is None:
1773 df = dim
1774 elif not np.isscalar(df):
1775 raise ValueError("Degrees of freedom must be a scalar.")
1776 elif df < dim:
1777 raise ValueError("Degrees of freedom cannot be less than dimension"
1778 " of scale matrix, but df = %d" % df)
1780 return dim, df, scale
1782 def _process_quantiles(self, x, dim):
1783 """
1784 Adjust quantiles array so that last axis labels the components of
1785 each data point.
1786 """
1787 x = np.asarray(x, dtype=float)
1789 if x.ndim == 0:
1790 x = x * np.eye(dim)[:, :, np.newaxis]
1791 if x.ndim == 1:
1792 if dim == 1:
1793 x = x[np.newaxis, np.newaxis, :]
1794 else:
1795 x = np.diag(x)[:, :, np.newaxis]
1796 elif x.ndim == 2:
1797 if not x.shape[0] == x.shape[1]:
1798 raise ValueError("Quantiles must be square if they are two"
1799 " dimensional, but x.shape = %s."
1800 % str(x.shape))
1801 x = x[:, :, np.newaxis]
1802 elif x.ndim == 3:
1803 if not x.shape[0] == x.shape[1]:
1804 raise ValueError("Quantiles must be square in the first two"
1805 " dimensions if they are three dimensional"
1806 ", but x.shape = %s." % str(x.shape))
1807 elif x.ndim > 3:
1808 raise ValueError("Quantiles must be at most two-dimensional with"
1809 " an additional dimension for multiple"
1810 "components, but x.ndim = %d" % x.ndim)
1812 # Now we have 3-dim array; should have shape [dim, dim, *]
1813 if not x.shape[0:2] == (dim, dim):
1814 raise ValueError('Quantiles have incompatible dimensions: should'
1815 ' be %s, got %s.' % ((dim, dim), x.shape[0:2]))
1817 return x
1819 def _process_size(self, size):
1820 size = np.asarray(size)
1822 if size.ndim == 0:
1823 size = size[np.newaxis]
1824 elif size.ndim > 1:
1825 raise ValueError('Size must be an integer or tuple of integers;'
1826 ' thus must have dimension <= 1.'
1827 ' Got size.ndim = %s' % str(tuple(size)))
1828 n = size.prod()
1829 shape = tuple(size)
1831 return n, shape
1833 def _logpdf(self, x, dim, df, scale, log_det_scale, C):
1834 """
1835 Parameters
1836 ----------
1837 x : ndarray
1838 Points at which to evaluate the log of the probability
1839 density function
1840 dim : int
1841 Dimension of the scale matrix
1842 df : int
1843 Degrees of freedom
1844 scale : ndarray
1845 Scale matrix
1846 log_det_scale : float
1847 Logarithm of the determinant of the scale matrix
1848 C : ndarray
1849 Cholesky factorization of the scale matrix, lower triagular.
1851 Notes
1852 -----
1853 As this function does no argument checking, it should not be
1854 called directly; use 'logpdf' instead.
1856 """
1857 # log determinant of x
1858 # Note: x has components along the last axis, so that x.T has
1859 # components alone the 0-th axis. Then since det(A) = det(A'), this
1860 # gives us a 1-dim vector of determinants
1862 # Retrieve tr(scale^{-1} x)
1863 log_det_x = np.zeros(x.shape[-1])
1864 scale_inv_x = np.zeros(x.shape)
1865 tr_scale_inv_x = np.zeros(x.shape[-1])
1866 for i in range(x.shape[-1]):
1867 _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
1868 scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])
1869 tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()
1871 # Log PDF
1872 out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -
1873 (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +
1874 multigammaln(0.5*df, dim)))
1876 return out
1878 def logpdf(self, x, df, scale):
1879 """
1880 Log of the Wishart probability density function.
1882 Parameters
1883 ----------
1884 x : array_like
1885 Quantiles, with the last axis of `x` denoting the components.
1886 Each quantile must be a symmetric positive definite matrix.
1887 %(_doc_default_callparams)s
1889 Returns
1890 -------
1891 pdf : ndarray
1892 Log of the probability density function evaluated at `x`
1894 Notes
1895 -----
1896 %(_doc_callparams_note)s
1898 """
1899 dim, df, scale = self._process_parameters(df, scale)
1900 x = self._process_quantiles(x, dim)
1902 # Cholesky decomposition of scale, get log(det(scale))
1903 C, log_det_scale = self._cholesky_logdet(scale)
1905 out = self._logpdf(x, dim, df, scale, log_det_scale, C)
1906 return _squeeze_output(out)
1908 def pdf(self, x, df, scale):
1909 """
1910 Wishart probability density function.
1912 Parameters
1913 ----------
1914 x : array_like
1915 Quantiles, with the last axis of `x` denoting the components.
1916 Each quantile must be a symmetric positive definite matrix.
1917 %(_doc_default_callparams)s
1919 Returns
1920 -------
1921 pdf : ndarray
1922 Probability density function evaluated at `x`
1924 Notes
1925 -----
1926 %(_doc_callparams_note)s
1928 """
1929 return np.exp(self.logpdf(x, df, scale))
1931 def _mean(self, dim, df, scale):
1932 """
1933 Parameters
1934 ----------
1935 dim : int
1936 Dimension of the scale matrix
1937 %(_doc_default_callparams)s
1939 Notes
1940 -----
1941 As this function does no argument checking, it should not be
1942 called directly; use 'mean' instead.
1944 """
1945 return df * scale
1947 def mean(self, df, scale):
1948 """
1949 Mean of the Wishart distribution
1951 Parameters
1952 ----------
1953 %(_doc_default_callparams)s
1955 Returns
1956 -------
1957 mean : float
1958 The mean of the distribution
1959 """
1960 dim, df, scale = self._process_parameters(df, scale)
1961 out = self._mean(dim, df, scale)
1962 return _squeeze_output(out)
1964 def _mode(self, dim, df, scale):
1965 """
1966 Parameters
1967 ----------
1968 dim : int
1969 Dimension of the scale matrix
1970 %(_doc_default_callparams)s
1972 Notes
1973 -----
1974 As this function does no argument checking, it should not be
1975 called directly; use 'mode' instead.
1977 """
1978 if df >= dim + 1:
1979 out = (df-dim-1) * scale
1980 else:
1981 out = None
1982 return out
1984 def mode(self, df, scale):
1985 """
1986 Mode of the Wishart distribution
1988 Only valid if the degrees of freedom are greater than the dimension of
1989 the scale matrix.
1991 Parameters
1992 ----------
1993 %(_doc_default_callparams)s
1995 Returns
1996 -------
1997 mode : float or None
1998 The Mode of the distribution
1999 """
2000 dim, df, scale = self._process_parameters(df, scale)
2001 out = self._mode(dim, df, scale)
2002 return _squeeze_output(out) if out is not None else out
2004 def _var(self, dim, df, scale):
2005 """
2006 Parameters
2007 ----------
2008 dim : int
2009 Dimension of the scale matrix
2010 %(_doc_default_callparams)s
2012 Notes
2013 -----
2014 As this function does no argument checking, it should not be
2015 called directly; use 'var' instead.
2017 """
2018 var = scale**2
2019 diag = scale.diagonal() # 1 x dim array
2020 var += np.outer(diag, diag)
2021 var *= df
2022 return var
2024 def var(self, df, scale):
2025 """
2026 Variance of the Wishart distribution
2028 Parameters
2029 ----------
2030 %(_doc_default_callparams)s
2032 Returns
2033 -------
2034 var : float
2035 The variance of the distribution
2036 """
2037 dim, df, scale = self._process_parameters(df, scale)
2038 out = self._var(dim, df, scale)
2039 return _squeeze_output(out)
2041 def _standard_rvs(self, n, shape, dim, df, random_state):
2042 """
2043 Parameters
2044 ----------
2045 n : integer
2046 Number of variates to generate
2047 shape : iterable
2048 Shape of the variates to generate
2049 dim : int
2050 Dimension of the scale matrix
2051 df : int
2052 Degrees of freedom
2053 random_state : {`~np.random.RandomState`, `~np.random.Generator`}
2054 Object used for drawing the random variates.
2056 Notes
2057 -----
2058 As this function does no argument checking, it should not be
2059 called directly; use 'rvs' instead.
2061 """
2062 # Random normal variates for off-diagonal elements
2063 n_tril = dim * (dim-1) // 2
2064 covariances = random_state.normal(
2065 size=n*n_tril).reshape(shape+(n_tril,))
2067 # Random chi-square variates for diagonal elements
2068 variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5
2069 for i in range(dim)]].reshape((dim,) +
2070 shape[::-1]).T)
2072 # Create the A matri(ces) - lower triangular
2073 A = np.zeros(shape + (dim, dim))
2075 # Input the covariances
2076 size_idx = tuple([slice(None, None, None)]*len(shape))
2077 tril_idx = np.tril_indices(dim, k=-1)
2078 A[size_idx + tril_idx] = covariances
2080 # Input the variances
2081 diag_idx = np.diag_indices(dim)
2082 A[size_idx + diag_idx] = variances
2084 return A
2086 def _rvs(self, n, shape, dim, df, C, random_state):
2087 """
2088 Parameters
2089 ----------
2090 n : integer
2091 Number of variates to generate
2092 shape : iterable
2093 Shape of the variates to generate
2094 dim : int
2095 Dimension of the scale matrix
2096 df : int
2097 Degrees of freedom
2098 scale : ndarray
2099 Scale matrix
2100 C : ndarray
2101 Cholesky factorization of the scale matrix, lower triangular.
2102 %(_doc_random_state)s
2104 Notes
2105 -----
2106 As this function does no argument checking, it should not be
2107 called directly; use 'rvs' instead.
2109 """
2110 random_state = self._get_random_state(random_state)
2111 # Calculate the matrices A, which are actually lower triangular
2112 # Cholesky factorizations of a matrix B such that B ~ W(df, I)
2113 A = self._standard_rvs(n, shape, dim, df, random_state)
2115 # Calculate SA = C A A' C', where SA ~ W(df, scale)
2116 # Note: this is the product of a (lower) (lower) (lower)' (lower)'
2117 # or, denoting B = AA', it is C B C' where C is the lower
2118 # triangular Cholesky factorization of the scale matrix.
2119 # this appears to conflict with the instructions in [1]_, which
2120 # suggest that it should be D' B D where D is the lower
2121 # triangular factorization of the scale matrix. However, it is
2122 # meant to refer to the Bartlett (1933) representation of a
2123 # Wishart random variate as L A A' L' where L is lower triangular
2124 # so it appears that understanding D' to be upper triangular
2125 # is either a typo in or misreading of [1]_.
2126 for index in np.ndindex(shape):
2127 CA = np.dot(C, A[index])
2128 A[index] = np.dot(CA, CA.T)
2130 return A
2132 def rvs(self, df, scale, size=1, random_state=None):
2133 """
2134 Draw random samples from a Wishart distribution.
2136 Parameters
2137 ----------
2138 %(_doc_default_callparams)s
2139 size : integer or iterable of integers, optional
2140 Number of samples to draw (default 1).
2141 %(_doc_random_state)s
2143 Returns
2144 -------
2145 rvs : ndarray
2146 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
2147 the dimension of the scale matrix.
2149 Notes
2150 -----
2151 %(_doc_callparams_note)s
2153 """
2154 n, shape = self._process_size(size)
2155 dim, df, scale = self._process_parameters(df, scale)
2157 # Cholesky decomposition of scale
2158 C = scipy.linalg.cholesky(scale, lower=True)
2160 out = self._rvs(n, shape, dim, df, C, random_state)
2162 return _squeeze_output(out)
2164 def _entropy(self, dim, df, log_det_scale):
2165 """
2166 Parameters
2167 ----------
2168 dim : int
2169 Dimension of the scale matrix
2170 df : int
2171 Degrees of freedom
2172 log_det_scale : float
2173 Logarithm of the determinant of the scale matrix
2175 Notes
2176 -----
2177 As this function does no argument checking, it should not be
2178 called directly; use 'entropy' instead.
2180 """
2181 return (
2182 0.5 * (dim+1) * log_det_scale +
2183 0.5 * dim * (dim+1) * _LOG_2 +
2184 multigammaln(0.5*df, dim) -
2185 0.5 * (df - dim - 1) * np.sum(
2186 [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]
2187 ) +
2188 0.5 * df * dim
2189 )
2191 def entropy(self, df, scale):
2192 """
2193 Compute the differential entropy of the Wishart.
2195 Parameters
2196 ----------
2197 %(_doc_default_callparams)s
2199 Returns
2200 -------
2201 h : scalar
2202 Entropy of the Wishart distribution
2204 Notes
2205 -----
2206 %(_doc_callparams_note)s
2208 """
2209 dim, df, scale = self._process_parameters(df, scale)
2210 _, log_det_scale = self._cholesky_logdet(scale)
2211 return self._entropy(dim, df, log_det_scale)
2213 def _cholesky_logdet(self, scale):
2214 """
2215 Compute Cholesky decomposition and determine (log(det(scale)).
2217 Parameters
2218 ----------
2219 scale : ndarray
2220 Scale matrix.
2222 Returns
2223 -------
2224 c_decomp : ndarray
2225 The Cholesky decomposition of `scale`.
2226 logdet : scalar
2227 The log of the determinant of `scale`.
2229 Notes
2230 -----
2231 This computation of ``logdet`` is equivalent to
2232 ``np.linalg.slogdet(scale)``. It is ~2x faster though.
2234 """
2235 c_decomp = scipy.linalg.cholesky(scale, lower=True)
2236 logdet = 2 * np.sum(np.log(c_decomp.diagonal()))
2237 return c_decomp, logdet
2240wishart = wishart_gen()
2243class wishart_frozen(multi_rv_frozen):
2244 """
2245 Create a frozen Wishart distribution.
2247 Parameters
2248 ----------
2249 df : array_like
2250 Degrees of freedom of the distribution
2251 scale : array_like
2252 Scale matrix of the distribution
2253 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
2254 This parameter defines the object to use for drawing random variates.
2255 If `seed` is `None` the `~np.random.RandomState` singleton is used.
2256 If `seed` is an int, a new ``RandomState`` instance is used, seeded
2257 with seed.
2258 If `seed` is already a ``RandomState`` or ``Generator`` instance,
2259 then that object is used.
2260 Default is None.
2262 """
2263 def __init__(self, df, scale, seed=None):
2264 self._dist = wishart_gen(seed)
2265 self.dim, self.df, self.scale = self._dist._process_parameters(
2266 df, scale)
2267 self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)
2269 def logpdf(self, x):
2270 x = self._dist._process_quantiles(x, self.dim)
2272 out = self._dist._logpdf(x, self.dim, self.df, self.scale,
2273 self.log_det_scale, self.C)
2274 return _squeeze_output(out)
2276 def pdf(self, x):
2277 return np.exp(self.logpdf(x))
2279 def mean(self):
2280 out = self._dist._mean(self.dim, self.df, self.scale)
2281 return _squeeze_output(out)
2283 def mode(self):
2284 out = self._dist._mode(self.dim, self.df, self.scale)
2285 return _squeeze_output(out) if out is not None else out
2287 def var(self):
2288 out = self._dist._var(self.dim, self.df, self.scale)
2289 return _squeeze_output(out)
2291 def rvs(self, size=1, random_state=None):
2292 n, shape = self._dist._process_size(size)
2293 out = self._dist._rvs(n, shape, self.dim, self.df,
2294 self.C, random_state)
2295 return _squeeze_output(out)
2297 def entropy(self):
2298 return self._dist._entropy(self.dim, self.df, self.log_det_scale)
2301# Set frozen generator docstrings from corresponding docstrings in
2302# Wishart and fill in default strings in class docstrings
2303for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:
2304 method = wishart_gen.__dict__[name]
2305 method_frozen = wishart_frozen.__dict__[name]
2306 method_frozen.__doc__ = doccer.docformat(
2307 method.__doc__, wishart_docdict_noparams)
2308 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
2311def _cho_inv_batch(a, check_finite=True):
2312 """
2313 Invert the matrices a_i, using a Cholesky factorization of A, where
2314 a_i resides in the last two dimensions of a and the other indices describe
2315 the index i.
2317 Overwrites the data in a.
2319 Parameters
2320 ----------
2321 a : array
2322 Array of matrices to invert, where the matrices themselves are stored
2323 in the last two dimensions.
2324 check_finite : bool, optional
2325 Whether to check that the input matrices contain only finite numbers.
2326 Disabling may give a performance gain, but may result in problems
2327 (crashes, non-termination) if the inputs do contain infinities or NaNs.
2329 Returns
2330 -------
2331 x : array
2332 Array of inverses of the matrices ``a_i``.
2334 See also
2335 --------
2336 scipy.linalg.cholesky : Cholesky factorization of a matrix
2338 """
2339 if check_finite:
2340 a1 = asarray_chkfinite(a)
2341 else:
2342 a1 = asarray(a)
2343 if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]:
2344 raise ValueError('expected square matrix in last two dimensions')
2346 potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,))
2348 triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1)
2349 for index in np.ndindex(a1.shape[:-2]):
2351 # Cholesky decomposition
2352 a1[index], info = potrf(a1[index], lower=True, overwrite_a=False,
2353 clean=False)
2354 if info > 0:
2355 raise LinAlgError("%d-th leading minor not positive definite"
2356 % info)
2357 if info < 0:
2358 raise ValueError('illegal value in %d-th argument of internal'
2359 ' potrf' % -info)
2360 # Inversion
2361 a1[index], info = potri(a1[index], lower=True, overwrite_c=False)
2362 if info > 0:
2363 raise LinAlgError("the inverse could not be computed")
2364 if info < 0:
2365 raise ValueError('illegal value in %d-th argument of internal'
2366 ' potrf' % -info)
2368 # Make symmetric (dpotri only fills in the lower triangle)
2369 a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows]
2371 return a1
2374class invwishart_gen(wishart_gen):
2375 r"""
2376 An inverse Wishart random variable.
2378 The `df` keyword specifies the degrees of freedom. The `scale` keyword
2379 specifies the scale matrix, which must be symmetric and positive definite.
2380 In this context, the scale matrix is often interpreted in terms of a
2381 multivariate normal covariance matrix.
2383 Methods
2384 -------
2385 ``pdf(x, df, scale)``
2386 Probability density function.
2387 ``logpdf(x, df, scale)``
2388 Log of the probability density function.
2389 ``rvs(df, scale, size=1, random_state=None)``
2390 Draw random samples from an inverse Wishart distribution.
2392 Parameters
2393 ----------
2394 x : array_like
2395 Quantiles, with the last axis of `x` denoting the components.
2396 %(_doc_default_callparams)s
2397 %(_doc_random_state)s
2399 Alternatively, the object may be called (as a function) to fix the degrees
2400 of freedom and scale parameters, returning a "frozen" inverse Wishart
2401 random variable:
2403 rv = invwishart(df=1, scale=1)
2404 - Frozen object with the same methods but holding the given
2405 degrees of freedom and scale fixed.
2407 See Also
2408 --------
2409 wishart
2411 Notes
2412 -----
2413 %(_doc_callparams_note)s
2415 The scale matrix `scale` must be a symmetric positive definite
2416 matrix. Singular matrices, including the symmetric positive semi-definite
2417 case, are not supported.
2419 The inverse Wishart distribution is often denoted
2421 .. math::
2423 W_p^{-1}(\nu, \Psi)
2425 where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the
2426 :math:`p \times p` scale matrix.
2428 The probability density function for `invwishart` has support over positive
2429 definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,
2430 then its PDF is given by:
2432 .. math::
2434 f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }
2435 |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}
2436 \exp\left( -tr(\Sigma S^{-1}) / 2 \right)
2438 If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then
2439 :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).
2441 If the scale matrix is 1-dimensional and equal to one, then the inverse
2442 Wishart distribution :math:`W_1(\nu, 1)` collapses to the
2443 inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`
2444 and scale = :math:`\frac{1}{2}`.
2446 .. versionadded:: 0.16.0
2448 References
2449 ----------
2450 .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
2451 Wiley, 1983.
2452 .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications
2453 in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,
2454 1985.
2456 Examples
2457 --------
2458 >>> import matplotlib.pyplot as plt
2459 >>> from scipy.stats import invwishart, invgamma
2460 >>> x = np.linspace(0.01, 1, 100)
2461 >>> iw = invwishart.pdf(x, df=6, scale=1)
2462 >>> iw[:3]
2463 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
2464 >>> ig = invgamma.pdf(x, 6/2., scale=1./2)
2465 >>> ig[:3]
2466 array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
2467 >>> plt.plot(x, iw)
2469 The input quantiles can be any shape of array, as long as the last
2470 axis labels the components.
2472 """
2474 def __init__(self, seed=None):
2475 super(invwishart_gen, self).__init__(seed)
2476 self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
2478 def __call__(self, df=None, scale=None, seed=None):
2479 """
2480 Create a frozen inverse Wishart distribution.
2482 See `invwishart_frozen` for more information.
2484 """
2485 return invwishart_frozen(df, scale, seed)
2487 def _logpdf(self, x, dim, df, scale, log_det_scale):
2488 """
2489 Parameters
2490 ----------
2491 x : ndarray
2492 Points at which to evaluate the log of the probability
2493 density function.
2494 dim : int
2495 Dimension of the scale matrix
2496 df : int
2497 Degrees of freedom
2498 scale : ndarray
2499 Scale matrix
2500 log_det_scale : float
2501 Logarithm of the determinant of the scale matrix
2503 Notes
2504 -----
2505 As this function does no argument checking, it should not be
2506 called directly; use 'logpdf' instead.
2508 """
2509 log_det_x = np.zeros(x.shape[-1])
2510 x_inv = np.copy(x).T
2511 if dim > 1:
2512 _cho_inv_batch(x_inv) # works in-place
2513 else:
2514 x_inv = 1./x_inv
2515 tr_scale_x_inv = np.zeros(x.shape[-1])
2517 for i in range(x.shape[-1]):
2518 C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True)
2520 log_det_x[i] = 2 * np.sum(np.log(C.diagonal()))
2522 tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace()
2524 # Log PDF
2525 out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -
2526 (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -
2527 multigammaln(0.5*df, dim))
2529 return out
2531 def logpdf(self, x, df, scale):
2532 """
2533 Log of the inverse Wishart probability density function.
2535 Parameters
2536 ----------
2537 x : array_like
2538 Quantiles, with the last axis of `x` denoting the components.
2539 Each quantile must be a symmetric positive definite matrix.
2540 %(_doc_default_callparams)s
2542 Returns
2543 -------
2544 pdf : ndarray
2545 Log of the probability density function evaluated at `x`
2547 Notes
2548 -----
2549 %(_doc_callparams_note)s
2551 """
2552 dim, df, scale = self._process_parameters(df, scale)
2553 x = self._process_quantiles(x, dim)
2554 _, log_det_scale = self._cholesky_logdet(scale)
2555 out = self._logpdf(x, dim, df, scale, log_det_scale)
2556 return _squeeze_output(out)
2558 def pdf(self, x, df, scale):
2559 """
2560 Inverse Wishart probability density function.
2562 Parameters
2563 ----------
2564 x : array_like
2565 Quantiles, with the last axis of `x` denoting the components.
2566 Each quantile must be a symmetric positive definite matrix.
2568 %(_doc_default_callparams)s
2570 Returns
2571 -------
2572 pdf : ndarray
2573 Probability density function evaluated at `x`
2575 Notes
2576 -----
2577 %(_doc_callparams_note)s
2579 """
2580 return np.exp(self.logpdf(x, df, scale))
2582 def _mean(self, dim, df, scale):
2583 """
2584 Parameters
2585 ----------
2586 dim : int
2587 Dimension of the scale matrix
2588 %(_doc_default_callparams)s
2590 Notes
2591 -----
2592 As this function does no argument checking, it should not be
2593 called directly; use 'mean' instead.
2595 """
2596 if df > dim + 1:
2597 out = scale / (df - dim - 1)
2598 else:
2599 out = None
2600 return out
2602 def mean(self, df, scale):
2603 """
2604 Mean of the inverse Wishart distribution
2606 Only valid if the degrees of freedom are greater than the dimension of
2607 the scale matrix plus one.
2609 Parameters
2610 ----------
2611 %(_doc_default_callparams)s
2613 Returns
2614 -------
2615 mean : float or None
2616 The mean of the distribution
2618 """
2619 dim, df, scale = self._process_parameters(df, scale)
2620 out = self._mean(dim, df, scale)
2621 return _squeeze_output(out) if out is not None else out
2623 def _mode(self, dim, df, scale):
2624 """
2625 Parameters
2626 ----------
2627 dim : int
2628 Dimension of the scale matrix
2629 %(_doc_default_callparams)s
2631 Notes
2632 -----
2633 As this function does no argument checking, it should not be
2634 called directly; use 'mode' instead.
2636 """
2637 return scale / (df + dim + 1)
2639 def mode(self, df, scale):
2640 """
2641 Mode of the inverse Wishart distribution
2643 Parameters
2644 ----------
2645 %(_doc_default_callparams)s
2647 Returns
2648 -------
2649 mode : float
2650 The Mode of the distribution
2652 """
2653 dim, df, scale = self._process_parameters(df, scale)
2654 out = self._mode(dim, df, scale)
2655 return _squeeze_output(out)
2657 def _var(self, dim, df, scale):
2658 """
2659 Parameters
2660 ----------
2661 dim : int
2662 Dimension of the scale matrix
2663 %(_doc_default_callparams)s
2665 Notes
2666 -----
2667 As this function does no argument checking, it should not be
2668 called directly; use 'var' instead.
2670 """
2671 if df > dim + 3:
2672 var = (df - dim + 1) * scale**2
2673 diag = scale.diagonal() # 1 x dim array
2674 var += (df - dim - 1) * np.outer(diag, diag)
2675 var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)
2676 else:
2677 var = None
2678 return var
2680 def var(self, df, scale):
2681 """
2682 Variance of the inverse Wishart distribution
2684 Only valid if the degrees of freedom are greater than the dimension of
2685 the scale matrix plus three.
2687 Parameters
2688 ----------
2689 %(_doc_default_callparams)s
2691 Returns
2692 -------
2693 var : float
2694 The variance of the distribution
2695 """
2696 dim, df, scale = self._process_parameters(df, scale)
2697 out = self._var(dim, df, scale)
2698 return _squeeze_output(out) if out is not None else out
2700 def _rvs(self, n, shape, dim, df, C, random_state):
2701 """
2702 Parameters
2703 ----------
2704 n : integer
2705 Number of variates to generate
2706 shape : iterable
2707 Shape of the variates to generate
2708 dim : int
2709 Dimension of the scale matrix
2710 df : int
2711 Degrees of freedom
2712 C : ndarray
2713 Cholesky factorization of the scale matrix, lower triagular.
2714 %(_doc_random_state)s
2716 Notes
2717 -----
2718 As this function does no argument checking, it should not be
2719 called directly; use 'rvs' instead.
2721 """
2722 random_state = self._get_random_state(random_state)
2723 # Get random draws A such that A ~ W(df, I)
2724 A = super(invwishart_gen, self)._standard_rvs(n, shape, dim,
2725 df, random_state)
2727 # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)
2728 eye = np.eye(dim)
2729 trtrs = get_lapack_funcs(('trtrs'), (A,))
2731 for index in np.ndindex(A.shape[:-2]):
2732 # Calculate CA
2733 CA = np.dot(C, A[index])
2734 # Get (C A)^{-1} via triangular solver
2735 if dim > 1:
2736 CA, info = trtrs(CA, eye, lower=True)
2737 if info > 0:
2738 raise LinAlgError("Singular matrix.")
2739 if info < 0:
2740 raise ValueError('Illegal value in %d-th argument of'
2741 ' internal trtrs' % -info)
2742 else:
2743 CA = 1. / CA
2744 # Get SA
2745 A[index] = np.dot(CA.T, CA)
2747 return A
2749 def rvs(self, df, scale, size=1, random_state=None):
2750 """
2751 Draw random samples from an inverse Wishart distribution.
2753 Parameters
2754 ----------
2755 %(_doc_default_callparams)s
2756 size : integer or iterable of integers, optional
2757 Number of samples to draw (default 1).
2758 %(_doc_random_state)s
2760 Returns
2761 -------
2762 rvs : ndarray
2763 Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
2764 the dimension of the scale matrix.
2766 Notes
2767 -----
2768 %(_doc_callparams_note)s
2770 """
2771 n, shape = self._process_size(size)
2772 dim, df, scale = self._process_parameters(df, scale)
2774 # Invert the scale
2775 eye = np.eye(dim)
2776 L, lower = scipy.linalg.cho_factor(scale, lower=True)
2777 inv_scale = scipy.linalg.cho_solve((L, lower), eye)
2778 # Cholesky decomposition of inverted scale
2779 C = scipy.linalg.cholesky(inv_scale, lower=True)
2781 out = self._rvs(n, shape, dim, df, C, random_state)
2783 return _squeeze_output(out)
2785 def entropy(self):
2786 # Need to find reference for inverse Wishart entropy
2787 raise AttributeError
2790invwishart = invwishart_gen()
2793class invwishart_frozen(multi_rv_frozen):
2794 def __init__(self, df, scale, seed=None):
2795 """
2796 Create a frozen inverse Wishart distribution.
2798 Parameters
2799 ----------
2800 df : array_like
2801 Degrees of freedom of the distribution
2802 scale : array_like
2803 Scale matrix of the distribution
2804 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
2805 This parameter defines the object to use for drawing random
2806 variates.
2807 If `seed` is `None` the `~np.random.RandomState` singleton is used.
2808 If `seed` is an int, a new ``RandomState`` instance is used, seeded
2809 with seed.
2810 If `seed` is already a ``RandomState`` or ``Generator`` instance,
2811 then that object is used.
2812 Default is None.
2814 """
2815 self._dist = invwishart_gen(seed)
2816 self.dim, self.df, self.scale = self._dist._process_parameters(
2817 df, scale
2818 )
2820 # Get the determinant via Cholesky factorization
2821 C, lower = scipy.linalg.cho_factor(self.scale, lower=True)
2822 self.log_det_scale = 2 * np.sum(np.log(C.diagonal()))
2824 # Get the inverse using the Cholesky factorization
2825 eye = np.eye(self.dim)
2826 self.inv_scale = scipy.linalg.cho_solve((C, lower), eye)
2828 # Get the Cholesky factorization of the inverse scale
2829 self.C = scipy.linalg.cholesky(self.inv_scale, lower=True)
2831 def logpdf(self, x):
2832 x = self._dist._process_quantiles(x, self.dim)
2833 out = self._dist._logpdf(x, self.dim, self.df, self.scale,
2834 self.log_det_scale)
2835 return _squeeze_output(out)
2837 def pdf(self, x):
2838 return np.exp(self.logpdf(x))
2840 def mean(self):
2841 out = self._dist._mean(self.dim, self.df, self.scale)
2842 return _squeeze_output(out) if out is not None else out
2844 def mode(self):
2845 out = self._dist._mode(self.dim, self.df, self.scale)
2846 return _squeeze_output(out)
2848 def var(self):
2849 out = self._dist._var(self.dim, self.df, self.scale)
2850 return _squeeze_output(out) if out is not None else out
2852 def rvs(self, size=1, random_state=None):
2853 n, shape = self._dist._process_size(size)
2855 out = self._dist._rvs(n, shape, self.dim, self.df,
2856 self.C, random_state)
2858 return _squeeze_output(out)
2860 def entropy(self):
2861 # Need to find reference for inverse Wishart entropy
2862 raise AttributeError
2865# Set frozen generator docstrings from corresponding docstrings in
2866# inverse Wishart and fill in default strings in class docstrings
2867for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:
2868 method = invwishart_gen.__dict__[name]
2869 method_frozen = wishart_frozen.__dict__[name]
2870 method_frozen.__doc__ = doccer.docformat(
2871 method.__doc__, wishart_docdict_noparams)
2872 method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
2874_multinomial_doc_default_callparams = """\
2875n : int
2876 Number of trials
2877p : array_like
2878 Probability of a trial falling into each category; should sum to 1
2879"""
2881_multinomial_doc_callparams_note = \
2882"""`n` should be a positive integer. Each element of `p` should be in the
2883interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to
28841, the last element of the `p` array is not used and is replaced with the
2885remaining probability left over from the earlier elements.
2886"""
2888_multinomial_doc_frozen_callparams = ""
2890_multinomial_doc_frozen_callparams_note = \
2891 """See class definition for a detailed description of parameters."""
2893multinomial_docdict_params = {
2894 '_doc_default_callparams': _multinomial_doc_default_callparams,
2895 '_doc_callparams_note': _multinomial_doc_callparams_note,
2896 '_doc_random_state': _doc_random_state
2897}
2899multinomial_docdict_noparams = {
2900 '_doc_default_callparams': _multinomial_doc_frozen_callparams,
2901 '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,
2902 '_doc_random_state': _doc_random_state
2903}
2906class multinomial_gen(multi_rv_generic):
2907 r"""
2908 A multinomial random variable.
2910 Methods
2911 -------
2912 ``pmf(x, n, p)``
2913 Probability mass function.
2914 ``logpmf(x, n, p)``
2915 Log of the probability mass function.
2916 ``rvs(n, p, size=1, random_state=None)``
2917 Draw random samples from a multinomial distribution.
2918 ``entropy(n, p)``
2919 Compute the entropy of the multinomial distribution.
2920 ``cov(n, p)``
2921 Compute the covariance matrix of the multinomial distribution.
2923 Parameters
2924 ----------
2925 x : array_like
2926 Quantiles, with the last axis of `x` denoting the components.
2927 %(_doc_default_callparams)s
2928 %(_doc_random_state)s
2930 Notes
2931 -----
2932 %(_doc_callparams_note)s
2934 Alternatively, the object may be called (as a function) to fix the `n` and
2935 `p` parameters, returning a "frozen" multinomial random variable:
2937 The probability mass function for `multinomial` is
2939 .. math::
2941 f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},
2943 supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a
2944 nonnegative integer and their sum is :math:`n`.
2946 .. versionadded:: 0.19.0
2948 Examples
2949 --------
2951 >>> from scipy.stats import multinomial
2952 >>> rv = multinomial(8, [0.3, 0.2, 0.5])
2953 >>> rv.pmf([1, 3, 4])
2954 0.042000000000000072
2956 The multinomial distribution for :math:`k=2` is identical to the
2957 corresponding binomial distribution (tiny numerical differences
2958 notwithstanding):
2960 >>> from scipy.stats import binom
2961 >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])
2962 0.29030399999999973
2963 >>> binom.pmf(3, 7, 0.4)
2964 0.29030400000000012
2966 The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support
2967 broadcasting, under the convention that the vector parameters (``x`` and
2968 ``p``) are interpreted as if each row along the last axis is a single
2969 object. For instance:
2971 >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])
2972 array([0.2268945, 0.25412184])
2974 Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,
2975 but following the rules mentioned above they behave as if the rows
2976 ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single
2977 object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and
2978 ``p.shape = ()``. To obtain the individual elements without broadcasting,
2979 we would do this:
2981 >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])
2982 0.2268945
2983 >>> multinomial.pmf([3, 5], 8, p=[.3, .7])
2984 0.25412184
2986 This broadcasting also works for ``cov``, where the output objects are
2987 square matrices of size ``p.shape[-1]``. For example:
2989 >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
2990 array([[[ 0.84, -0.84],
2991 [-0.84, 0.84]],
2992 [[ 1.2 , -1.2 ],
2993 [-1.2 , 1.2 ]]])
2995 In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and
2996 following the rules above, these broadcast as if ``p.shape == (2,)``.
2997 Thus the result should also be of shape ``(2,)``, but since each output is
2998 a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,
2999 where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and
3000 ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.
3002 See also
3003 --------
3004 scipy.stats.binom : The binomial distribution.
3005 numpy.random.Generator.multinomial : Sampling from the multinomial distribution.
3006 """ # noqa: E501
3008 def __init__(self, seed=None):
3009 super(multinomial_gen, self).__init__(seed)
3010 self.__doc__ = \
3011 doccer.docformat(self.__doc__, multinomial_docdict_params)
3013 def __call__(self, n, p, seed=None):
3014 """
3015 Create a frozen multinomial distribution.
3017 See `multinomial_frozen` for more information.
3018 """
3019 return multinomial_frozen(n, p, seed)
3021 def _process_parameters(self, n, p):
3022 """
3023 Return: n_, p_, npcond.
3025 n_ and p_ are arrays of the correct shape; npcond is a boolean array
3026 flagging values out of the domain.
3027 """
3028 p = np.array(p, dtype=np.float64, copy=True)
3029 p[..., -1] = 1. - p[..., :-1].sum(axis=-1)
3031 # true for bad p
3032 pcond = np.any(p < 0, axis=-1)
3033 pcond |= np.any(p > 1, axis=-1)
3035 n = np.array(n, dtype=np.int, copy=True)
3037 # true for bad n
3038 ncond = n <= 0
3040 return n, p, ncond | pcond
3042 def _process_quantiles(self, x, n, p):
3043 """
3044 Return: x_, xcond.
3046 x_ is an int array; xcond is a boolean array flagging values out of the
3047 domain.
3048 """
3049 xx = np.asarray(x, dtype=np.int)
3051 if xx.ndim == 0:
3052 raise ValueError("x must be an array.")
3054 if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:
3055 raise ValueError("Size of each quantile should be size of p: "
3056 "received %d, but expected %d." %
3057 (xx.shape[-1], p.shape[-1]))
3059 # true for x out of the domain
3060 cond = np.any(xx != x, axis=-1)
3061 cond |= np.any(xx < 0, axis=-1)
3062 cond = cond | (np.sum(xx, axis=-1) != n)
3064 return xx, cond
3066 def _checkresult(self, result, cond, bad_value):
3067 result = np.asarray(result)
3069 if cond.ndim != 0:
3070 result[cond] = bad_value
3071 elif cond:
3072 if result.ndim == 0:
3073 return bad_value
3074 result[...] = bad_value
3075 return result
3077 def _logpmf(self, x, n, p):
3078 return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)
3080 def logpmf(self, x, n, p):
3081 """
3082 Log of the Multinomial probability mass function.
3084 Parameters
3085 ----------
3086 x : array_like
3087 Quantiles, with the last axis of `x` denoting the components.
3088 %(_doc_default_callparams)s
3090 Returns
3091 -------
3092 logpmf : ndarray or scalar
3093 Log of the probability mass function evaluated at `x`
3095 Notes
3096 -----
3097 %(_doc_callparams_note)s
3098 """
3099 n, p, npcond = self._process_parameters(n, p)
3100 x, xcond = self._process_quantiles(x, n, p)
3102 result = self._logpmf(x, n, p)
3104 # replace values for which x was out of the domain; broadcast
3105 # xcond to the right shape
3106 xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)
3107 result = self._checkresult(result, xcond_, np.NINF)
3109 # replace values bad for n or p; broadcast npcond to the right shape
3110 npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)
3111 return self._checkresult(result, npcond_, np.NAN)
3113 def pmf(self, x, n, p):
3114 """
3115 Multinomial probability mass function.
3117 Parameters
3118 ----------
3119 x : array_like
3120 Quantiles, with the last axis of `x` denoting the components.
3121 %(_doc_default_callparams)s
3123 Returns
3124 -------
3125 pmf : ndarray or scalar
3126 Probability density function evaluated at `x`
3128 Notes
3129 -----
3130 %(_doc_callparams_note)s
3131 """
3132 return np.exp(self.logpmf(x, n, p))
3134 def mean(self, n, p):
3135 """
3136 Mean of the Multinomial distribution
3138 Parameters
3139 ----------
3140 %(_doc_default_callparams)s
3142 Returns
3143 -------
3144 mean : float
3145 The mean of the distribution
3146 """
3147 n, p, npcond = self._process_parameters(n, p)
3148 result = n[..., np.newaxis]*p
3149 return self._checkresult(result, npcond, np.NAN)
3151 def cov(self, n, p):
3152 """
3153 Covariance matrix of the multinomial distribution.
3155 Parameters
3156 ----------
3157 %(_doc_default_callparams)s
3159 Returns
3160 -------
3161 cov : ndarray
3162 The covariance matrix of the distribution
3163 """
3164 n, p, npcond = self._process_parameters(n, p)
3166 nn = n[..., np.newaxis, np.newaxis]
3167 result = nn * np.einsum('...j,...k->...jk', -p, p)
3169 # change the diagonal
3170 for i in range(p.shape[-1]):
3171 result[..., i, i] += n*p[..., i]
3173 return self._checkresult(result, npcond, np.nan)
3175 def entropy(self, n, p):
3176 r"""
3177 Compute the entropy of the multinomial distribution.
3179 The entropy is computed using this expression:
3181 .. math::
3183 f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +
3184 \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!
3186 Parameters
3187 ----------
3188 %(_doc_default_callparams)s
3190 Returns
3191 -------
3192 h : scalar
3193 Entropy of the Multinomial distribution
3195 Notes
3196 -----
3197 %(_doc_callparams_note)s
3198 """
3199 n, p, npcond = self._process_parameters(n, p)
3201 x = np.r_[1:np.max(n)+1]
3203 term1 = n*np.sum(entr(p), axis=-1)
3204 term1 -= gammaln(n+1)
3206 n = n[..., np.newaxis]
3207 new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1
3208 x.shape += (1,)*new_axes_needed
3210 term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),
3211 axis=(-1, -1-new_axes_needed))
3213 return self._checkresult(term1 + term2, npcond, np.nan)
3215 def rvs(self, n, p, size=None, random_state=None):
3216 """
3217 Draw random samples from a Multinomial distribution.
3219 Parameters
3220 ----------
3221 %(_doc_default_callparams)s
3222 size : integer or iterable of integers, optional
3223 Number of samples to draw (default 1).
3224 %(_doc_random_state)s
3226 Returns
3227 -------
3228 rvs : ndarray or scalar
3229 Random variates of shape (`size`, `len(p)`)
3231 Notes
3232 -----
3233 %(_doc_callparams_note)s
3234 """
3235 n, p, npcond = self._process_parameters(n, p)
3236 random_state = self._get_random_state(random_state)
3237 return random_state.multinomial(n, p, size)
3240multinomial = multinomial_gen()
3243class multinomial_frozen(multi_rv_frozen):
3244 r"""
3245 Create a frozen Multinomial distribution.
3247 Parameters
3248 ----------
3249 n : int
3250 number of trials
3251 p: array_like
3252 probability of a trial falling into each category; should sum to 1
3253 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
3254 This parameter defines the object to use for drawing random variates.
3255 If `seed` is `None` the `~np.random.RandomState` singleton is used.
3256 If `seed` is an int, a new ``RandomState`` instance is used, seeded
3257 with seed.
3258 If `seed` is already a ``RandomState`` or ``Generator`` instance,
3259 then that object is used.
3260 Default is None.
3261 """
3262 def __init__(self, n, p, seed=None):
3263 self._dist = multinomial_gen(seed)
3264 self.n, self.p, self.npcond = self._dist._process_parameters(n, p)
3266 # monkey patch self._dist
3267 def _process_parameters(n, p):
3268 return self.n, self.p, self.npcond
3270 self._dist._process_parameters = _process_parameters
3272 def logpmf(self, x):
3273 return self._dist.logpmf(x, self.n, self.p)
3275 def pmf(self, x):
3276 return self._dist.pmf(x, self.n, self.p)
3278 def mean(self):
3279 return self._dist.mean(self.n, self.p)
3281 def cov(self):
3282 return self._dist.cov(self.n, self.p)
3284 def entropy(self):
3285 return self._dist.entropy(self.n, self.p)
3287 def rvs(self, size=1, random_state=None):
3288 return self._dist.rvs(self.n, self.p, size, random_state)
3291# Set frozen generator docstrings from corresponding docstrings in
3292# multinomial and fill in default strings in class docstrings
3293for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:
3294 method = multinomial_gen.__dict__[name]
3295 method_frozen = multinomial_frozen.__dict__[name]
3296 method_frozen.__doc__ = doccer.docformat(
3297 method.__doc__, multinomial_docdict_noparams)
3298 method.__doc__ = doccer.docformat(method.__doc__,
3299 multinomial_docdict_params)
3302class special_ortho_group_gen(multi_rv_generic):
3303 r"""
3304 A matrix-valued SO(N) random variable.
3306 Return a random rotation matrix, drawn from the Haar distribution
3307 (the only uniform distribution on SO(n)).
3309 The `dim` keyword specifies the dimension N.
3311 Methods
3312 -------
3313 ``rvs(dim=None, size=1, random_state=None)``
3314 Draw random samples from SO(N).
3316 Parameters
3317 ----------
3318 dim : scalar
3319 Dimension of matrices
3321 Notes
3322 ----------
3323 This class is wrapping the random_rot code from the MDP Toolkit,
3324 https://github.com/mdp-toolkit/mdp-toolkit
3326 Return a random rotation matrix, drawn from the Haar distribution
3327 (the only uniform distribution on SO(n)).
3328 The algorithm is described in the paper
3329 Stewart, G.W., "The efficient generation of random orthogonal
3330 matrices with an application to condition estimators", SIAM Journal
3331 on Numerical Analysis, 17(3), pp. 403-409, 1980.
3332 For more information see
3333 https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization
3335 See also the similar `ortho_group`.
3337 Examples
3338 --------
3339 >>> from scipy.stats import special_ortho_group
3340 >>> x = special_ortho_group.rvs(3)
3342 >>> np.dot(x, x.T)
3343 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
3344 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
3345 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
3347 >>> import scipy.linalg
3348 >>> scipy.linalg.det(x)
3349 1.0
3351 This generates one random matrix from SO(3). It is orthogonal and
3352 has a determinant of 1.
3354 """
3356 def __init__(self, seed=None):
3357 super(special_ortho_group_gen, self).__init__(seed)
3358 self.__doc__ = doccer.docformat(self.__doc__)
3360 def __call__(self, dim=None, seed=None):
3361 """
3362 Create a frozen SO(N) distribution.
3364 See `special_ortho_group_frozen` for more information.
3366 """
3367 return special_ortho_group_frozen(dim, seed=seed)
3369 def _process_parameters(self, dim):
3370 """
3371 Dimension N must be specified; it cannot be inferred.
3372 """
3374 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
3375 raise ValueError("""Dimension of rotation must be specified,
3376 and must be a scalar greater than 1.""")
3378 return dim
3380 def rvs(self, dim, size=1, random_state=None):
3381 """
3382 Draw random samples from SO(N).
3384 Parameters
3385 ----------
3386 dim : integer
3387 Dimension of rotation space (N).
3388 size : integer, optional
3389 Number of samples to draw (default 1).
3391 Returns
3392 -------
3393 rvs : ndarray or scalar
3394 Random size N-dimensional matrices, dimension (size, dim, dim)
3396 """
3397 random_state = self._get_random_state(random_state)
3399 size = int(size)
3400 if size > 1:
3401 return np.array([self.rvs(dim, size=1, random_state=random_state)
3402 for i in range(size)])
3404 dim = self._process_parameters(dim)
3406 H = np.eye(dim)
3407 D = np.empty((dim,))
3408 for n in range(dim-1):
3409 x = random_state.normal(size=(dim-n,))
3410 norm2 = np.dot(x, x)
3411 x0 = x[0].item()
3412 D[n] = np.sign(x[0]) if x[0] != 0 else 1
3413 x[0] += D[n]*np.sqrt(norm2)
3414 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.)
3415 # Householder transformation
3416 H[:, n:] -= np.outer(np.dot(H[:, n:], x), x)
3417 D[-1] = (-1)**(dim-1)*D[:-1].prod()
3418 # Equivalent to np.dot(np.diag(D), H) but faster, apparently
3419 H = (D*H.T).T
3420 return H
3423special_ortho_group = special_ortho_group_gen()
3426class special_ortho_group_frozen(multi_rv_frozen):
3427 def __init__(self, dim=None, seed=None):
3428 """
3429 Create a frozen SO(N) distribution.
3431 Parameters
3432 ----------
3433 dim : scalar
3434 Dimension of matrices
3435 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
3436 This parameter defines the object to use for drawing random
3437 variates.
3438 If `seed` is `None` the `~np.random.RandomState` singleton is used.
3439 If `seed` is an int, a new ``RandomState`` instance is used, seeded
3440 with seed.
3441 If `seed` is already a ``RandomState`` or ``Generator`` instance,
3442 then that object is used.
3443 Default is None.
3445 Examples
3446 --------
3447 >>> from scipy.stats import special_ortho_group
3448 >>> g = special_ortho_group(5)
3449 >>> x = g.rvs()
3451 """
3452 self._dist = special_ortho_group_gen(seed)
3453 self.dim = self._dist._process_parameters(dim)
3455 def rvs(self, size=1, random_state=None):
3456 return self._dist.rvs(self.dim, size, random_state)
3459class ortho_group_gen(multi_rv_generic):
3460 r"""
3461 A matrix-valued O(N) random variable.
3463 Return a random orthogonal matrix, drawn from the O(N) Haar
3464 distribution (the only uniform distribution on O(N)).
3466 The `dim` keyword specifies the dimension N.
3468 Methods
3469 -------
3470 ``rvs(dim=None, size=1, random_state=None)``
3471 Draw random samples from O(N).
3473 Parameters
3474 ----------
3475 dim : scalar
3476 Dimension of matrices
3478 Notes
3479 ----------
3480 This class is closely related to `special_ortho_group`.
3482 Some care is taken to avoid numerical error, as per the paper by Mezzadri.
3484 References
3485 ----------
3486 .. [1] F. Mezzadri, "How to generate random matrices from the classical
3487 compact groups", :arXiv:`math-ph/0609050v2`.
3489 Examples
3490 --------
3491 >>> from scipy.stats import ortho_group
3492 >>> x = ortho_group.rvs(3)
3494 >>> np.dot(x, x.T)
3495 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
3496 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
3497 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
3499 >>> import scipy.linalg
3500 >>> np.fabs(scipy.linalg.det(x))
3501 1.0
3503 This generates one random matrix from O(3). It is orthogonal and
3504 has a determinant of +1 or -1.
3506 """
3508 def __init__(self, seed=None):
3509 super(ortho_group_gen, self).__init__(seed)
3510 self.__doc__ = doccer.docformat(self.__doc__)
3512 def _process_parameters(self, dim):
3513 """
3514 Dimension N must be specified; it cannot be inferred.
3515 """
3517 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
3518 raise ValueError("Dimension of rotation must be specified,"
3519 "and must be a scalar greater than 1.")
3521 return dim
3523 def rvs(self, dim, size=1, random_state=None):
3524 """
3525 Draw random samples from O(N).
3527 Parameters
3528 ----------
3529 dim : integer
3530 Dimension of rotation space (N).
3531 size : integer, optional
3532 Number of samples to draw (default 1).
3534 Returns
3535 -------
3536 rvs : ndarray or scalar
3537 Random size N-dimensional matrices, dimension (size, dim, dim)
3539 """
3540 random_state = self._get_random_state(random_state)
3542 size = int(size)
3543 if size > 1:
3544 return np.array([self.rvs(dim, size=1, random_state=random_state)
3545 for i in range(size)])
3547 dim = self._process_parameters(dim)
3549 H = np.eye(dim)
3550 for n in range(dim):
3551 x = random_state.normal(size=(dim-n,))
3552 norm2 = np.dot(x, x)
3553 x0 = x[0].item()
3554 # random sign, 50/50, but chosen carefully to avoid roundoff error
3555 D = np.sign(x[0]) if x[0] != 0 else 1
3556 x[0] += D * np.sqrt(norm2)
3557 x /= np.sqrt((norm2 - x0**2 + x[0]**2) / 2.)
3558 # Householder transformation
3559 H[:, n:] = -D * (H[:, n:] - np.outer(np.dot(H[:, n:], x), x))
3560 return H
3563ortho_group = ortho_group_gen()
3566class random_correlation_gen(multi_rv_generic):
3567 r"""
3568 A random correlation matrix.
3570 Return a random correlation matrix, given a vector of eigenvalues.
3572 The `eigs` keyword specifies the eigenvalues of the correlation matrix,
3573 and implies the dimension.
3575 Methods
3576 -------
3577 ``rvs(eigs=None, random_state=None)``
3578 Draw random correlation matrices, all with eigenvalues eigs.
3580 Parameters
3581 ----------
3582 eigs : 1d ndarray
3583 Eigenvalues of correlation matrix.
3585 Notes
3586 ----------
3588 Generates a random correlation matrix following a numerically stable
3589 algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)
3590 similarity transformation to construct a symmetric positive semi-definite
3591 matrix, and applies a series of Givens rotations to scale it to have ones
3592 on the diagonal.
3594 References
3595 ----------
3597 .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation
3598 of correlation matrices and their factors", BIT 2000, Vol. 40,
3599 No. 4, pp. 640 651
3601 Examples
3602 --------
3603 >>> from scipy.stats import random_correlation
3604 >>> np.random.seed(514)
3605 >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5))
3606 >>> x
3607 array([[ 1. , -0.20387311, 0.18366501, -0.04953711],
3608 [-0.20387311, 1. , -0.24351129, 0.06703474],
3609 [ 0.18366501, -0.24351129, 1. , 0.38530195],
3610 [-0.04953711, 0.06703474, 0.38530195, 1. ]])
3611 >>> import scipy.linalg
3612 >>> e, v = scipy.linalg.eigh(x)
3613 >>> e
3614 array([ 0.5, 0.8, 1.2, 1.5])
3616 """
3618 def __init__(self, seed=None):
3619 super(random_correlation_gen, self).__init__(seed)
3620 self.__doc__ = doccer.docformat(self.__doc__)
3622 def _process_parameters(self, eigs, tol):
3623 eigs = np.asarray(eigs, dtype=float)
3624 dim = eigs.size
3626 if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:
3627 raise ValueError("Array 'eigs' must be a vector of length "
3628 "greater than 1.")
3630 if np.fabs(np.sum(eigs) - dim) > tol:
3631 raise ValueError("Sum of eigenvalues must equal dimensionality.")
3633 for x in eigs:
3634 if x < -tol:
3635 raise ValueError("All eigenvalues must be non-negative.")
3637 return dim, eigs
3639 def _givens_to_1(self, aii, ajj, aij):
3640 """Computes a 2x2 Givens matrix to put 1's on the diagonal.
3642 The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].
3644 The output matrix g is a 2x2 anti-symmetric matrix of the form
3645 [ c s ; -s c ]; the elements c and s are returned.
3647 Applying the output matrix to the input matrix (as b=g.T M g)
3648 results in a matrix with bii=1, provided tr(M) - det(M) >= 1
3649 and floating point issues do not occur. Otherwise, some other
3650 valid rotation is returned. When tr(M)==2, also bjj=1.
3652 """
3653 aiid = aii - 1.
3654 ajjd = ajj - 1.
3656 if ajjd == 0:
3657 # ajj==1, so swap aii and ajj to avoid division by zero
3658 return 0., 1.
3660 dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))
3662 # The choice of t should be chosen to avoid cancellation [1]
3663 t = (aij + math.copysign(dd, aij)) / ajjd
3664 c = 1. / math.sqrt(1. + t*t)
3665 if c == 0:
3666 # Underflow
3667 s = 1.0
3668 else:
3669 s = c*t
3670 return c, s
3672 def _to_corr(self, m):
3673 """
3674 Given a psd matrix m, rotate to put one's on the diagonal, turning it
3675 into a correlation matrix. This also requires the trace equal the
3676 dimensionality. Note: modifies input matrix
3677 """
3678 # Check requirements for in-place Givens
3679 if not (m.flags.c_contiguous and m.dtype == np.float64 and
3680 m.shape[0] == m.shape[1]):
3681 raise ValueError()
3683 d = m.shape[0]
3684 for i in range(d-1):
3685 if m[i, i] == 1:
3686 continue
3687 elif m[i, i] > 1:
3688 for j in range(i+1, d):
3689 if m[j, j] < 1:
3690 break
3691 else:
3692 for j in range(i+1, d):
3693 if m[j, j] > 1:
3694 break
3696 c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])
3698 # Use BLAS to apply Givens rotations in-place. Equivalent to:
3699 # g = np.eye(d)
3700 # g[i, i] = g[j,j] = c
3701 # g[j, i] = -s; g[i, j] = s
3702 # m = np.dot(g.T, np.dot(m, g))
3703 mv = m.ravel()
3704 drot(mv, mv, c, -s, n=d,
3705 offx=i*d, incx=1, offy=j*d, incy=1,
3706 overwrite_x=True, overwrite_y=True)
3707 drot(mv, mv, c, -s, n=d,
3708 offx=i, incx=d, offy=j, incy=d,
3709 overwrite_x=True, overwrite_y=True)
3711 return m
3713 def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):
3714 """
3715 Draw random correlation matrices
3717 Parameters
3718 ----------
3719 eigs : 1d ndarray
3720 Eigenvalues of correlation matrix
3721 tol : float, optional
3722 Tolerance for input parameter checks
3723 diag_tol : float, optional
3724 Tolerance for deviation of the diagonal of the resulting
3725 matrix. Default: 1e-7
3727 Raises
3728 ------
3729 RuntimeError
3730 Floating point error prevented generating a valid correlation
3731 matrix.
3733 Returns
3734 -------
3735 rvs : ndarray or scalar
3736 Random size N-dimensional matrices, dimension (size, dim, dim),
3737 each having eigenvalues eigs.
3739 """
3740 dim, eigs = self._process_parameters(eigs, tol=tol)
3742 random_state = self._get_random_state(random_state)
3744 m = ortho_group.rvs(dim, random_state=random_state)
3745 m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m
3746 m = self._to_corr(m) # Carefully rotate to unit diagonal
3748 # Check diagonal
3749 if abs(m.diagonal() - 1).max() > diag_tol:
3750 raise RuntimeError("Failed to generate a valid correlation matrix")
3752 return m
3755random_correlation = random_correlation_gen()
3758class unitary_group_gen(multi_rv_generic):
3759 r"""
3760 A matrix-valued U(N) random variable.
3762 Return a random unitary matrix.
3764 The `dim` keyword specifies the dimension N.
3766 Methods
3767 -------
3768 ``rvs(dim=None, size=1, random_state=None)``
3769 Draw random samples from U(N).
3771 Parameters
3772 ----------
3773 dim : scalar
3774 Dimension of matrices
3776 Notes
3777 ----------
3778 This class is similar to `ortho_group`.
3780 References
3781 ----------
3782 .. [1] F. Mezzadri, "How to generate random matrices from the classical
3783 compact groups", arXiv:math-ph/0609050v2.
3785 Examples
3786 --------
3787 >>> from scipy.stats import unitary_group
3788 >>> x = unitary_group.rvs(3)
3790 >>> np.dot(x, x.conj().T)
3791 array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
3792 [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
3793 [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
3795 This generates one random matrix from U(3). The dot product confirms that
3796 it is unitary up to machine precision.
3798 """
3800 def __init__(self, seed=None):
3801 super(unitary_group_gen, self).__init__(seed)
3802 self.__doc__ = doccer.docformat(self.__doc__)
3804 def _process_parameters(self, dim):
3805 """
3806 Dimension N must be specified; it cannot be inferred.
3807 """
3809 if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
3810 raise ValueError("Dimension of rotation must be specified,"
3811 "and must be a scalar greater than 1.")
3813 return dim
3815 def rvs(self, dim, size=1, random_state=None):
3816 """
3817 Draw random samples from U(N).
3819 Parameters
3820 ----------
3821 dim : integer
3822 Dimension of space (N).
3823 size : integer, optional
3824 Number of samples to draw (default 1).
3826 Returns
3827 -------
3828 rvs : ndarray or scalar
3829 Random size N-dimensional matrices, dimension (size, dim, dim)
3831 """
3832 random_state = self._get_random_state(random_state)
3834 size = int(size)
3835 if size > 1:
3836 return np.array([self.rvs(dim, size=1, random_state=random_state)
3837 for i in range(size)])
3839 dim = self._process_parameters(dim)
3841 z = 1/math.sqrt(2)*(random_state.normal(size=(dim, dim)) +
3842 1j*random_state.normal(size=(dim, dim)))
3843 q, r = scipy.linalg.qr(z)
3844 d = r.diagonal()
3845 q *= d/abs(d)
3846 return q
3849unitary_group = unitary_group_gen()