Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/numpy/lib/nanfunctions.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Functions that ignore NaN.
4Functions
5---------
7- `nanmin` -- minimum non-NaN value
8- `nanmax` -- maximum non-NaN value
9- `nanargmin` -- index of minimum non-NaN value
10- `nanargmax` -- index of maximum non-NaN value
11- `nansum` -- sum of non-NaN values
12- `nanprod` -- product of non-NaN values
13- `nancumsum` -- cumulative sum of non-NaN values
14- `nancumprod` -- cumulative product of non-NaN values
15- `nanmean` -- mean of non-NaN values
16- `nanvar` -- variance of non-NaN values
17- `nanstd` -- standard deviation of non-NaN values
18- `nanmedian` -- median of non-NaN values
19- `nanquantile` -- qth quantile of non-NaN values
20- `nanpercentile` -- qth percentile of non-NaN values
22"""
23import functools
24import warnings
25import numpy as np
26from numpy.lib import function_base
27from numpy.core import overrides
30array_function_dispatch = functools.partial(
31 overrides.array_function_dispatch, module='numpy')
34__all__ = [
35 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
36 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
37 'nancumsum', 'nancumprod', 'nanquantile'
38 ]
41def _nan_mask(a, out=None):
42 """
43 Parameters
44 ----------
45 a : array-like
46 Input array with at least 1 dimension.
47 out : ndarray, optional
48 Alternate output array in which to place the result. The default
49 is ``None``; if provided, it must have the same shape as the
50 expected output and will prevent the allocation of a new array.
52 Returns
53 -------
54 y : bool ndarray or True
55 A bool array where ``np.nan`` positions are marked with ``False``
56 and other positions are marked with ``True``. If the type of ``a``
57 is such that it can't possibly contain ``np.nan``, returns ``True``.
58 """
59 # we assume that a is an array for this private function
61 if a.dtype.kind not in 'fc':
62 return True
64 y = np.isnan(a, out=out)
65 y = np.invert(y, out=y)
66 return y
68def _replace_nan(a, val):
69 """
70 If `a` is of inexact type, make a copy of `a`, replace NaNs with
71 the `val` value, and return the copy together with a boolean mask
72 marking the locations where NaNs were present. If `a` is not of
73 inexact type, do nothing and return `a` together with a mask of None.
75 Note that scalars will end up as array scalars, which is important
76 for using the result as the value of the out argument in some
77 operations.
79 Parameters
80 ----------
81 a : array-like
82 Input array.
83 val : float
84 NaN values are set to val before doing the operation.
86 Returns
87 -------
88 y : ndarray
89 If `a` is of inexact type, return a copy of `a` with the NaNs
90 replaced by the fill value, otherwise return `a`.
91 mask: {bool, None}
92 If `a` is of inexact type, return a boolean mask marking locations of
93 NaNs, otherwise return None.
95 """
96 a = np.asanyarray(a)
98 if a.dtype == np.object_:
99 # object arrays do not support `isnan` (gh-9009), so make a guess
100 mask = np.not_equal(a, a, dtype=bool)
101 elif issubclass(a.dtype.type, np.inexact):
102 mask = np.isnan(a)
103 else:
104 mask = None
106 if mask is not None:
107 a = np.array(a, subok=True, copy=True)
108 np.copyto(a, val, where=mask)
110 return a, mask
113def _copyto(a, val, mask):
114 """
115 Replace values in `a` with NaN where `mask` is True. This differs from
116 copyto in that it will deal with the case where `a` is a numpy scalar.
118 Parameters
119 ----------
120 a : ndarray or numpy scalar
121 Array or numpy scalar some of whose values are to be replaced
122 by val.
123 val : numpy scalar
124 Value used a replacement.
125 mask : ndarray, scalar
126 Boolean array. Where True the corresponding element of `a` is
127 replaced by `val`. Broadcasts.
129 Returns
130 -------
131 res : ndarray, scalar
132 Array with elements replaced or scalar `val`.
134 """
135 if isinstance(a, np.ndarray):
136 np.copyto(a, val, where=mask, casting='unsafe')
137 else:
138 a = a.dtype.type(val)
139 return a
142def _remove_nan_1d(arr1d, overwrite_input=False):
143 """
144 Equivalent to arr1d[~arr1d.isnan()], but in a different order
146 Presumably faster as it incurs fewer copies
148 Parameters
149 ----------
150 arr1d : ndarray
151 Array to remove nans from
152 overwrite_input : bool
153 True if `arr1d` can be modified in place
155 Returns
156 -------
157 res : ndarray
158 Array with nan elements removed
159 overwrite_input : bool
160 True if `res` can be modified in place, given the constraint on the
161 input
162 """
164 c = np.isnan(arr1d)
165 s = np.nonzero(c)[0]
166 if s.size == arr1d.size:
167 warnings.warn("All-NaN slice encountered", RuntimeWarning,
168 stacklevel=5)
169 return arr1d[:0], True
170 elif s.size == 0:
171 return arr1d, overwrite_input
172 else:
173 if not overwrite_input:
174 arr1d = arr1d.copy()
175 # select non-nans at end of array
176 enonan = arr1d[-s.size:][~c[-s.size:]]
177 # fill nans in beginning of array with non-nans of end
178 arr1d[s[:enonan.size]] = enonan
180 return arr1d[:-s.size], True
183def _divide_by_count(a, b, out=None):
184 """
185 Compute a/b ignoring invalid results. If `a` is an array the division
186 is done in place. If `a` is a scalar, then its type is preserved in the
187 output. If out is None, then then a is used instead so that the
188 division is in place. Note that this is only called with `a` an inexact
189 type.
191 Parameters
192 ----------
193 a : {ndarray, numpy scalar}
194 Numerator. Expected to be of inexact type but not checked.
195 b : {ndarray, numpy scalar}
196 Denominator.
197 out : ndarray, optional
198 Alternate output array in which to place the result. The default
199 is ``None``; if provided, it must have the same shape as the
200 expected output, but the type will be cast if necessary.
202 Returns
203 -------
204 ret : {ndarray, numpy scalar}
205 The return value is a/b. If `a` was an ndarray the division is done
206 in place. If `a` is a numpy scalar, the division preserves its type.
208 """
209 with np.errstate(invalid='ignore', divide='ignore'):
210 if isinstance(a, np.ndarray):
211 if out is None:
212 return np.divide(a, b, out=a, casting='unsafe')
213 else:
214 return np.divide(a, b, out=out, casting='unsafe')
215 else:
216 if out is None:
217 return a.dtype.type(a / b)
218 else:
219 # This is questionable, but currently a numpy scalar can
220 # be output to a zero dimensional array.
221 return np.divide(a, b, out=out, casting='unsafe')
224def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None):
225 return (a, out)
228@array_function_dispatch(_nanmin_dispatcher)
229def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
230 """
231 Return minimum of an array or minimum along an axis, ignoring any NaNs.
232 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
233 Nan is returned for that slice.
235 Parameters
236 ----------
237 a : array_like
238 Array containing numbers whose minimum is desired. If `a` is not an
239 array, a conversion is attempted.
240 axis : {int, tuple of int, None}, optional
241 Axis or axes along which the minimum is computed. The default is to compute
242 the minimum of the flattened array.
243 out : ndarray, optional
244 Alternate output array in which to place the result. The default
245 is ``None``; if provided, it must have the same shape as the
246 expected output, but the type will be cast if necessary. See
247 `ufuncs-output-type` for more details.
249 .. versionadded:: 1.8.0
250 keepdims : bool, optional
251 If this is set to True, the axes which are reduced are left
252 in the result as dimensions with size one. With this option,
253 the result will broadcast correctly against the original `a`.
255 If the value is anything but the default, then
256 `keepdims` will be passed through to the `min` method
257 of sub-classes of `ndarray`. If the sub-classes methods
258 does not implement `keepdims` any exceptions will be raised.
260 .. versionadded:: 1.8.0
262 Returns
263 -------
264 nanmin : ndarray
265 An array with the same shape as `a`, with the specified axis
266 removed. If `a` is a 0-d array, or if axis is None, an ndarray
267 scalar is returned. The same dtype as `a` is returned.
269 See Also
270 --------
271 nanmax :
272 The maximum value of an array along a given axis, ignoring any NaNs.
273 amin :
274 The minimum value of an array along a given axis, propagating any NaNs.
275 fmin :
276 Element-wise minimum of two arrays, ignoring any NaNs.
277 minimum :
278 Element-wise minimum of two arrays, propagating any NaNs.
279 isnan :
280 Shows which elements are Not a Number (NaN).
281 isfinite:
282 Shows which elements are neither NaN nor infinity.
284 amax, fmax, maximum
286 Notes
287 -----
288 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
289 (IEEE 754). This means that Not a Number is not equivalent to infinity.
290 Positive infinity is treated as a very large number and negative
291 infinity is treated as a very small (i.e. negative) number.
293 If the input has a integer type the function is equivalent to np.min.
295 Examples
296 --------
297 >>> a = np.array([[1, 2], [3, np.nan]])
298 >>> np.nanmin(a)
299 1.0
300 >>> np.nanmin(a, axis=0)
301 array([1., 2.])
302 >>> np.nanmin(a, axis=1)
303 array([1., 3.])
305 When positive infinity and negative infinity are present:
307 >>> np.nanmin([1, 2, np.nan, np.inf])
308 1.0
309 >>> np.nanmin([1, 2, np.nan, np.NINF])
310 -inf
312 """
313 kwargs = {}
314 if keepdims is not np._NoValue:
315 kwargs['keepdims'] = keepdims
316 if type(a) is np.ndarray and a.dtype != np.object_:
317 # Fast, but not safe for subclasses of ndarray, or object arrays,
318 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
319 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
320 if np.isnan(res).any():
321 warnings.warn("All-NaN slice encountered", RuntimeWarning,
322 stacklevel=3)
323 else:
324 # Slow, but safe for subclasses of ndarray
325 a, mask = _replace_nan(a, +np.inf)
326 res = np.amin(a, axis=axis, out=out, **kwargs)
327 if mask is None:
328 return res
330 # Check for all-NaN axis
331 mask = np.all(mask, axis=axis, **kwargs)
332 if np.any(mask):
333 res = _copyto(res, np.nan, mask)
334 warnings.warn("All-NaN axis encountered", RuntimeWarning,
335 stacklevel=3)
336 return res
339def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None):
340 return (a, out)
343@array_function_dispatch(_nanmax_dispatcher)
344def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
345 """
346 Return the maximum of an array or maximum along an axis, ignoring any
347 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
348 raised and NaN is returned for that slice.
350 Parameters
351 ----------
352 a : array_like
353 Array containing numbers whose maximum is desired. If `a` is not an
354 array, a conversion is attempted.
355 axis : {int, tuple of int, None}, optional
356 Axis or axes along which the maximum is computed. The default is to compute
357 the maximum of the flattened array.
358 out : ndarray, optional
359 Alternate output array in which to place the result. The default
360 is ``None``; if provided, it must have the same shape as the
361 expected output, but the type will be cast if necessary. See
362 `ufuncs-output-type` for more details.
364 .. versionadded:: 1.8.0
365 keepdims : bool, optional
366 If this is set to True, the axes which are reduced are left
367 in the result as dimensions with size one. With this option,
368 the result will broadcast correctly against the original `a`.
370 If the value is anything but the default, then
371 `keepdims` will be passed through to the `max` method
372 of sub-classes of `ndarray`. If the sub-classes methods
373 does not implement `keepdims` any exceptions will be raised.
375 .. versionadded:: 1.8.0
377 Returns
378 -------
379 nanmax : ndarray
380 An array with the same shape as `a`, with the specified axis removed.
381 If `a` is a 0-d array, or if axis is None, an ndarray scalar is
382 returned. The same dtype as `a` is returned.
384 See Also
385 --------
386 nanmin :
387 The minimum value of an array along a given axis, ignoring any NaNs.
388 amax :
389 The maximum value of an array along a given axis, propagating any NaNs.
390 fmax :
391 Element-wise maximum of two arrays, ignoring any NaNs.
392 maximum :
393 Element-wise maximum of two arrays, propagating any NaNs.
394 isnan :
395 Shows which elements are Not a Number (NaN).
396 isfinite:
397 Shows which elements are neither NaN nor infinity.
399 amin, fmin, minimum
401 Notes
402 -----
403 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
404 (IEEE 754). This means that Not a Number is not equivalent to infinity.
405 Positive infinity is treated as a very large number and negative
406 infinity is treated as a very small (i.e. negative) number.
408 If the input has a integer type the function is equivalent to np.max.
410 Examples
411 --------
412 >>> a = np.array([[1, 2], [3, np.nan]])
413 >>> np.nanmax(a)
414 3.0
415 >>> np.nanmax(a, axis=0)
416 array([3., 2.])
417 >>> np.nanmax(a, axis=1)
418 array([2., 3.])
420 When positive infinity and negative infinity are present:
422 >>> np.nanmax([1, 2, np.nan, np.NINF])
423 2.0
424 >>> np.nanmax([1, 2, np.nan, np.inf])
425 inf
427 """
428 kwargs = {}
429 if keepdims is not np._NoValue:
430 kwargs['keepdims'] = keepdims
431 if type(a) is np.ndarray and a.dtype != np.object_:
432 # Fast, but not safe for subclasses of ndarray, or object arrays,
433 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
434 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
435 if np.isnan(res).any():
436 warnings.warn("All-NaN slice encountered", RuntimeWarning,
437 stacklevel=3)
438 else:
439 # Slow, but safe for subclasses of ndarray
440 a, mask = _replace_nan(a, -np.inf)
441 res = np.amax(a, axis=axis, out=out, **kwargs)
442 if mask is None:
443 return res
445 # Check for all-NaN axis
446 mask = np.all(mask, axis=axis, **kwargs)
447 if np.any(mask):
448 res = _copyto(res, np.nan, mask)
449 warnings.warn("All-NaN axis encountered", RuntimeWarning,
450 stacklevel=3)
451 return res
454def _nanargmin_dispatcher(a, axis=None):
455 return (a,)
458@array_function_dispatch(_nanargmin_dispatcher)
459def nanargmin(a, axis=None):
460 """
461 Return the indices of the minimum values in the specified axis ignoring
462 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
463 cannot be trusted if a slice contains only NaNs and Infs.
465 Parameters
466 ----------
467 a : array_like
468 Input data.
469 axis : int, optional
470 Axis along which to operate. By default flattened input is used.
472 Returns
473 -------
474 index_array : ndarray
475 An array of indices or a single index value.
477 See Also
478 --------
479 argmin, nanargmax
481 Examples
482 --------
483 >>> a = np.array([[np.nan, 4], [2, 3]])
484 >>> np.argmin(a)
485 0
486 >>> np.nanargmin(a)
487 2
488 >>> np.nanargmin(a, axis=0)
489 array([1, 1])
490 >>> np.nanargmin(a, axis=1)
491 array([1, 0])
493 """
494 a, mask = _replace_nan(a, np.inf)
495 res = np.argmin(a, axis=axis)
496 if mask is not None:
497 mask = np.all(mask, axis=axis)
498 if np.any(mask):
499 raise ValueError("All-NaN slice encountered")
500 return res
503def _nanargmax_dispatcher(a, axis=None):
504 return (a,)
507@array_function_dispatch(_nanargmax_dispatcher)
508def nanargmax(a, axis=None):
509 """
510 Return the indices of the maximum values in the specified axis ignoring
511 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
512 results cannot be trusted if a slice contains only NaNs and -Infs.
515 Parameters
516 ----------
517 a : array_like
518 Input data.
519 axis : int, optional
520 Axis along which to operate. By default flattened input is used.
522 Returns
523 -------
524 index_array : ndarray
525 An array of indices or a single index value.
527 See Also
528 --------
529 argmax, nanargmin
531 Examples
532 --------
533 >>> a = np.array([[np.nan, 4], [2, 3]])
534 >>> np.argmax(a)
535 0
536 >>> np.nanargmax(a)
537 1
538 >>> np.nanargmax(a, axis=0)
539 array([1, 0])
540 >>> np.nanargmax(a, axis=1)
541 array([1, 1])
543 """
544 a, mask = _replace_nan(a, -np.inf)
545 res = np.argmax(a, axis=axis)
546 if mask is not None:
547 mask = np.all(mask, axis=axis)
548 if np.any(mask):
549 raise ValueError("All-NaN slice encountered")
550 return res
553def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
554 return (a, out)
557@array_function_dispatch(_nansum_dispatcher)
558def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
559 """
560 Return the sum of array elements over a given axis treating Not a
561 Numbers (NaNs) as zero.
563 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
564 empty. In later versions zero is returned.
566 Parameters
567 ----------
568 a : array_like
569 Array containing numbers whose sum is desired. If `a` is not an
570 array, a conversion is attempted.
571 axis : {int, tuple of int, None}, optional
572 Axis or axes along which the sum is computed. The default is to compute the
573 sum of the flattened array.
574 dtype : data-type, optional
575 The type of the returned array and of the accumulator in which the
576 elements are summed. By default, the dtype of `a` is used. An
577 exception is when `a` has an integer type with less precision than
578 the platform (u)intp. In that case, the default will be either
579 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
580 bits. For inexact inputs, dtype must be inexact.
582 .. versionadded:: 1.8.0
583 out : ndarray, optional
584 Alternate output array in which to place the result. The default
585 is ``None``. If provided, it must have the same shape as the
586 expected output, but the type will be cast if necessary. See
587 `ufuncs-output-type` for more details. The casting of NaN to integer
588 can yield unexpected results.
590 .. versionadded:: 1.8.0
591 keepdims : bool, optional
592 If this is set to True, the axes which are reduced are left
593 in the result as dimensions with size one. With this option,
594 the result will broadcast correctly against the original `a`.
597 If the value is anything but the default, then
598 `keepdims` will be passed through to the `mean` or `sum` methods
599 of sub-classes of `ndarray`. If the sub-classes methods
600 does not implement `keepdims` any exceptions will be raised.
602 .. versionadded:: 1.8.0
604 Returns
605 -------
606 nansum : ndarray.
607 A new array holding the result is returned unless `out` is
608 specified, in which it is returned. The result has the same
609 size as `a`, and the same shape as `a` if `axis` is not None
610 or `a` is a 1-d array.
612 See Also
613 --------
614 numpy.sum : Sum across array propagating NaNs.
615 isnan : Show which elements are NaN.
616 isfinite: Show which elements are not NaN or +/-inf.
618 Notes
619 -----
620 If both positive and negative infinity are present, the sum will be Not
621 A Number (NaN).
623 Examples
624 --------
625 >>> np.nansum(1)
626 1
627 >>> np.nansum([1])
628 1
629 >>> np.nansum([1, np.nan])
630 1.0
631 >>> a = np.array([[1, 1], [1, np.nan]])
632 >>> np.nansum(a)
633 3.0
634 >>> np.nansum(a, axis=0)
635 array([2., 1.])
636 >>> np.nansum([1, np.nan, np.inf])
637 inf
638 >>> np.nansum([1, np.nan, np.NINF])
639 -inf
640 >>> from numpy.testing import suppress_warnings
641 >>> with suppress_warnings() as sup:
642 ... sup.filter(RuntimeWarning)
643 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
644 nan
646 """
647 a, mask = _replace_nan(a, 0)
648 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
651def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
652 return (a, out)
655@array_function_dispatch(_nanprod_dispatcher)
656def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
657 """
658 Return the product of array elements over a given axis treating Not a
659 Numbers (NaNs) as ones.
661 One is returned for slices that are all-NaN or empty.
663 .. versionadded:: 1.10.0
665 Parameters
666 ----------
667 a : array_like
668 Array containing numbers whose product is desired. If `a` is not an
669 array, a conversion is attempted.
670 axis : {int, tuple of int, None}, optional
671 Axis or axes along which the product is computed. The default is to compute
672 the product of the flattened array.
673 dtype : data-type, optional
674 The type of the returned array and of the accumulator in which the
675 elements are summed. By default, the dtype of `a` is used. An
676 exception is when `a` has an integer type with less precision than
677 the platform (u)intp. In that case, the default will be either
678 (u)int32 or (u)int64 depending on whether the platform is 32 or 64
679 bits. For inexact inputs, dtype must be inexact.
680 out : ndarray, optional
681 Alternate output array in which to place the result. The default
682 is ``None``. If provided, it must have the same shape as the
683 expected output, but the type will be cast if necessary. See
684 `ufuncs-output-type` for more details. The casting of NaN to integer
685 can yield unexpected results.
686 keepdims : bool, optional
687 If True, the axes which are reduced are left in the result as
688 dimensions with size one. With this option, the result will
689 broadcast correctly against the original `arr`.
691 Returns
692 -------
693 nanprod : ndarray
694 A new array holding the result is returned unless `out` is
695 specified, in which case it is returned.
697 See Also
698 --------
699 numpy.prod : Product across array propagating NaNs.
700 isnan : Show which elements are NaN.
702 Examples
703 --------
704 >>> np.nanprod(1)
705 1
706 >>> np.nanprod([1])
707 1
708 >>> np.nanprod([1, np.nan])
709 1.0
710 >>> a = np.array([[1, 2], [3, np.nan]])
711 >>> np.nanprod(a)
712 6.0
713 >>> np.nanprod(a, axis=0)
714 array([3., 2.])
716 """
717 a, mask = _replace_nan(a, 1)
718 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
721def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
722 return (a, out)
725@array_function_dispatch(_nancumsum_dispatcher)
726def nancumsum(a, axis=None, dtype=None, out=None):
727 """
728 Return the cumulative sum of array elements over a given axis treating Not a
729 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
730 encountered and leading NaNs are replaced by zeros.
732 Zeros are returned for slices that are all-NaN or empty.
734 .. versionadded:: 1.12.0
736 Parameters
737 ----------
738 a : array_like
739 Input array.
740 axis : int, optional
741 Axis along which the cumulative sum is computed. The default
742 (None) is to compute the cumsum over the flattened array.
743 dtype : dtype, optional
744 Type of the returned array and of the accumulator in which the
745 elements are summed. If `dtype` is not specified, it defaults
746 to the dtype of `a`, unless `a` has an integer dtype with a
747 precision less than that of the default platform integer. In
748 that case, the default platform integer is used.
749 out : ndarray, optional
750 Alternative output array in which to place the result. It must
751 have the same shape and buffer length as the expected output
752 but the type will be cast if necessary. See `ufuncs-output-type` for
753 more details.
755 Returns
756 -------
757 nancumsum : ndarray.
758 A new array holding the result is returned unless `out` is
759 specified, in which it is returned. The result has the same
760 size as `a`, and the same shape as `a` if `axis` is not None
761 or `a` is a 1-d array.
763 See Also
764 --------
765 numpy.cumsum : Cumulative sum across array propagating NaNs.
766 isnan : Show which elements are NaN.
768 Examples
769 --------
770 >>> np.nancumsum(1)
771 array([1])
772 >>> np.nancumsum([1])
773 array([1])
774 >>> np.nancumsum([1, np.nan])
775 array([1., 1.])
776 >>> a = np.array([[1, 2], [3, np.nan]])
777 >>> np.nancumsum(a)
778 array([1., 3., 6., 6.])
779 >>> np.nancumsum(a, axis=0)
780 array([[1., 2.],
781 [4., 2.]])
782 >>> np.nancumsum(a, axis=1)
783 array([[1., 3.],
784 [3., 3.]])
786 """
787 a, mask = _replace_nan(a, 0)
788 return np.cumsum(a, axis=axis, dtype=dtype, out=out)
791def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
792 return (a, out)
795@array_function_dispatch(_nancumprod_dispatcher)
796def nancumprod(a, axis=None, dtype=None, out=None):
797 """
798 Return the cumulative product of array elements over a given axis treating Not a
799 Numbers (NaNs) as one. The cumulative product does not change when NaNs are
800 encountered and leading NaNs are replaced by ones.
802 Ones are returned for slices that are all-NaN or empty.
804 .. versionadded:: 1.12.0
806 Parameters
807 ----------
808 a : array_like
809 Input array.
810 axis : int, optional
811 Axis along which the cumulative product is computed. By default
812 the input is flattened.
813 dtype : dtype, optional
814 Type of the returned array, as well as of the accumulator in which
815 the elements are multiplied. If *dtype* is not specified, it
816 defaults to the dtype of `a`, unless `a` has an integer dtype with
817 a precision less than that of the default platform integer. In
818 that case, the default platform integer is used instead.
819 out : ndarray, optional
820 Alternative output array in which to place the result. It must
821 have the same shape and buffer length as the expected output
822 but the type of the resulting values will be cast if necessary.
824 Returns
825 -------
826 nancumprod : ndarray
827 A new array holding the result is returned unless `out` is
828 specified, in which case it is returned.
830 See Also
831 --------
832 numpy.cumprod : Cumulative product across array propagating NaNs.
833 isnan : Show which elements are NaN.
835 Examples
836 --------
837 >>> np.nancumprod(1)
838 array([1])
839 >>> np.nancumprod([1])
840 array([1])
841 >>> np.nancumprod([1, np.nan])
842 array([1., 1.])
843 >>> a = np.array([[1, 2], [3, np.nan]])
844 >>> np.nancumprod(a)
845 array([1., 2., 6., 6.])
846 >>> np.nancumprod(a, axis=0)
847 array([[1., 2.],
848 [3., 2.]])
849 >>> np.nancumprod(a, axis=1)
850 array([[1., 2.],
851 [3., 3.]])
853 """
854 a, mask = _replace_nan(a, 1)
855 return np.cumprod(a, axis=axis, dtype=dtype, out=out)
858def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None):
859 return (a, out)
862@array_function_dispatch(_nanmean_dispatcher)
863def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
864 """
865 Compute the arithmetic mean along the specified axis, ignoring NaNs.
867 Returns the average of the array elements. The average is taken over
868 the flattened array by default, otherwise over the specified axis.
869 `float64` intermediate and return values are used for integer inputs.
871 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
873 .. versionadded:: 1.8.0
875 Parameters
876 ----------
877 a : array_like
878 Array containing numbers whose mean is desired. If `a` is not an
879 array, a conversion is attempted.
880 axis : {int, tuple of int, None}, optional
881 Axis or axes along which the means are computed. The default is to compute
882 the mean of the flattened array.
883 dtype : data-type, optional
884 Type to use in computing the mean. For integer inputs, the default
885 is `float64`; for inexact inputs, it is the same as the input
886 dtype.
887 out : ndarray, optional
888 Alternate output array in which to place the result. The default
889 is ``None``; if provided, it must have the same shape as the
890 expected output, but the type will be cast if necessary. See
891 `ufuncs-output-type` for more details.
892 keepdims : bool, optional
893 If this is set to True, the axes which are reduced are left
894 in the result as dimensions with size one. With this option,
895 the result will broadcast correctly against the original `a`.
897 If the value is anything but the default, then
898 `keepdims` will be passed through to the `mean` or `sum` methods
899 of sub-classes of `ndarray`. If the sub-classes methods
900 does not implement `keepdims` any exceptions will be raised.
902 Returns
903 -------
904 m : ndarray, see dtype parameter above
905 If `out=None`, returns a new array containing the mean values,
906 otherwise a reference to the output array is returned. Nan is
907 returned for slices that contain only NaNs.
909 See Also
910 --------
911 average : Weighted average
912 mean : Arithmetic mean taken while not ignoring NaNs
913 var, nanvar
915 Notes
916 -----
917 The arithmetic mean is the sum of the non-NaN elements along the axis
918 divided by the number of non-NaN elements.
920 Note that for floating-point input, the mean is computed using the same
921 precision the input has. Depending on the input data, this can cause
922 the results to be inaccurate, especially for `float32`. Specifying a
923 higher-precision accumulator using the `dtype` keyword can alleviate
924 this issue.
926 Examples
927 --------
928 >>> a = np.array([[1, np.nan], [3, 4]])
929 >>> np.nanmean(a)
930 2.6666666666666665
931 >>> np.nanmean(a, axis=0)
932 array([2., 4.])
933 >>> np.nanmean(a, axis=1)
934 array([1., 3.5]) # may vary
936 """
937 arr, mask = _replace_nan(a, 0)
938 if mask is None:
939 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
941 if dtype is not None:
942 dtype = np.dtype(dtype)
943 if dtype is not None and not issubclass(dtype.type, np.inexact):
944 raise TypeError("If a is inexact, then dtype must be inexact")
945 if out is not None and not issubclass(out.dtype.type, np.inexact):
946 raise TypeError("If a is inexact, then out must be inexact")
948 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims)
949 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
950 avg = _divide_by_count(tot, cnt, out=out)
952 isbad = (cnt == 0)
953 if isbad.any():
954 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3)
955 # NaN is the only possible bad value, so no further
956 # action is needed to handle bad results.
957 return avg
960def _nanmedian1d(arr1d, overwrite_input=False):
961 """
962 Private function for rank 1 arrays. Compute the median ignoring NaNs.
963 See nanmedian for parameter usage
964 """
965 arr1d, overwrite_input = _remove_nan_1d(arr1d,
966 overwrite_input=overwrite_input)
967 if arr1d.size == 0:
968 return np.nan
970 return np.median(arr1d, overwrite_input=overwrite_input)
973def _nanmedian(a, axis=None, out=None, overwrite_input=False):
974 """
975 Private function that doesn't support extended axis or keepdims.
976 These methods are extended to this function using _ureduce
977 See nanmedian for parameter usage
979 """
980 if axis is None or a.ndim == 1:
981 part = a.ravel()
982 if out is None:
983 return _nanmedian1d(part, overwrite_input)
984 else:
985 out[...] = _nanmedian1d(part, overwrite_input)
986 return out
987 else:
988 # for small medians use sort + indexing which is still faster than
989 # apply_along_axis
990 # benchmarked with shuffled (50, 50, x) containing a few NaN
991 if a.shape[axis] < 600:
992 return _nanmedian_small(a, axis, out, overwrite_input)
993 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
994 if out is not None:
995 out[...] = result
996 return result
999def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
1000 """
1001 sort + indexing median, faster for small medians along multiple
1002 dimensions due to the high overhead of apply_along_axis
1004 see nanmedian for parameter usage
1005 """
1006 a = np.ma.masked_array(a, np.isnan(a))
1007 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
1008 for i in range(np.count_nonzero(m.mask.ravel())):
1009 warnings.warn("All-NaN slice encountered", RuntimeWarning,
1010 stacklevel=4)
1011 if out is not None:
1012 out[...] = m.filled(np.nan)
1013 return out
1014 return m.filled(np.nan)
1017def _nanmedian_dispatcher(
1018 a, axis=None, out=None, overwrite_input=None, keepdims=None):
1019 return (a, out)
1022@array_function_dispatch(_nanmedian_dispatcher)
1023def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
1024 """
1025 Compute the median along the specified axis, while ignoring NaNs.
1027 Returns the median of the array elements.
1029 .. versionadded:: 1.9.0
1031 Parameters
1032 ----------
1033 a : array_like
1034 Input array or object that can be converted to an array.
1035 axis : {int, sequence of int, None}, optional
1036 Axis or axes along which the medians are computed. The default
1037 is to compute the median along a flattened version of the array.
1038 A sequence of axes is supported since version 1.9.0.
1039 out : ndarray, optional
1040 Alternative output array in which to place the result. It must
1041 have the same shape and buffer length as the expected output,
1042 but the type (of the output) will be cast if necessary.
1043 overwrite_input : bool, optional
1044 If True, then allow use of memory of input array `a` for
1045 calculations. The input array will be modified by the call to
1046 `median`. This will save memory when you do not need to preserve
1047 the contents of the input array. Treat the input as undefined,
1048 but it will probably be fully or partially sorted. Default is
1049 False. If `overwrite_input` is ``True`` and `a` is not already an
1050 `ndarray`, an error will be raised.
1051 keepdims : bool, optional
1052 If this is set to True, the axes which are reduced are left
1053 in the result as dimensions with size one. With this option,
1054 the result will broadcast correctly against the original `a`.
1056 If this is anything but the default value it will be passed
1057 through (in the special case of an empty array) to the
1058 `mean` function of the underlying array. If the array is
1059 a sub-class and `mean` does not have the kwarg `keepdims` this
1060 will raise a RuntimeError.
1062 Returns
1063 -------
1064 median : ndarray
1065 A new array holding the result. If the input contains integers
1066 or floats smaller than ``float64``, then the output data-type is
1067 ``np.float64``. Otherwise, the data-type of the output is the
1068 same as that of the input. If `out` is specified, that array is
1069 returned instead.
1071 See Also
1072 --------
1073 mean, median, percentile
1075 Notes
1076 -----
1077 Given a vector ``V`` of length ``N``, the median of ``V`` is the
1078 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
1079 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
1080 middle values of ``V_sorted`` when ``N`` is even.
1082 Examples
1083 --------
1084 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
1085 >>> a[0, 1] = np.nan
1086 >>> a
1087 array([[10., nan, 4.],
1088 [ 3., 2., 1.]])
1089 >>> np.median(a)
1090 nan
1091 >>> np.nanmedian(a)
1092 3.0
1093 >>> np.nanmedian(a, axis=0)
1094 array([6.5, 2. , 2.5])
1095 >>> np.median(a, axis=1)
1096 array([nan, 2.])
1097 >>> b = a.copy()
1098 >>> np.nanmedian(b, axis=1, overwrite_input=True)
1099 array([7., 2.])
1100 >>> assert not np.all(a==b)
1101 >>> b = a.copy()
1102 >>> np.nanmedian(b, axis=None, overwrite_input=True)
1103 3.0
1104 >>> assert not np.all(a==b)
1106 """
1107 a = np.asanyarray(a)
1108 # apply_along_axis in _nanmedian doesn't handle empty arrays well,
1109 # so deal them upfront
1110 if a.size == 0:
1111 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1113 r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
1114 overwrite_input=overwrite_input)
1115 if keepdims and keepdims is not np._NoValue:
1116 return r.reshape(k)
1117 else:
1118 return r
1121def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1122 interpolation=None, keepdims=None):
1123 return (a, q, out)
1126@array_function_dispatch(_nanpercentile_dispatcher)
1127def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
1128 interpolation='linear', keepdims=np._NoValue):
1129 """
1130 Compute the qth percentile of the data along the specified axis,
1131 while ignoring nan values.
1133 Returns the qth percentile(s) of the array elements.
1135 .. versionadded:: 1.9.0
1137 Parameters
1138 ----------
1139 a : array_like
1140 Input array or object that can be converted to an array, containing
1141 nan values to be ignored.
1142 q : array_like of float
1143 Percentile or sequence of percentiles to compute, which must be between
1144 0 and 100 inclusive.
1145 axis : {int, tuple of int, None}, optional
1146 Axis or axes along which the percentiles are computed. The
1147 default is to compute the percentile(s) along a flattened
1148 version of the array.
1149 out : ndarray, optional
1150 Alternative output array in which to place the result. It must
1151 have the same shape and buffer length as the expected output,
1152 but the type (of the output) will be cast if necessary.
1153 overwrite_input : bool, optional
1154 If True, then allow the input array `a` to be modified by intermediate
1155 calculations, to save memory. In this case, the contents of the input
1156 `a` after this function completes is undefined.
1157 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1158 This optional parameter specifies the interpolation method to
1159 use when the desired percentile lies between two data points
1160 ``i < j``:
1162 * 'linear': ``i + (j - i) * fraction``, where ``fraction``
1163 is the fractional part of the index surrounded by ``i``
1164 and ``j``.
1165 * 'lower': ``i``.
1166 * 'higher': ``j``.
1167 * 'nearest': ``i`` or ``j``, whichever is nearest.
1168 * 'midpoint': ``(i + j) / 2``.
1169 keepdims : bool, optional
1170 If this is set to True, the axes which are reduced are left in
1171 the result as dimensions with size one. With this option, the
1172 result will broadcast correctly against the original array `a`.
1174 If this is anything but the default value it will be passed
1175 through (in the special case of an empty array) to the
1176 `mean` function of the underlying array. If the array is
1177 a sub-class and `mean` does not have the kwarg `keepdims` this
1178 will raise a RuntimeError.
1180 Returns
1181 -------
1182 percentile : scalar or ndarray
1183 If `q` is a single percentile and `axis=None`, then the result
1184 is a scalar. If multiple percentiles are given, first axis of
1185 the result corresponds to the percentiles. The other axes are
1186 the axes that remain after the reduction of `a`. If the input
1187 contains integers or floats smaller than ``float64``, the output
1188 data-type is ``float64``. Otherwise, the output data-type is the
1189 same as that of the input. If `out` is specified, that array is
1190 returned instead.
1192 See Also
1193 --------
1194 nanmean
1195 nanmedian : equivalent to ``nanpercentile(..., 50)``
1196 percentile, median, mean
1197 nanquantile : equivalent to nanpercentile, but with q in the range [0, 1].
1199 Notes
1200 -----
1201 Given a vector ``V`` of length ``N``, the ``q``-th percentile of
1202 ``V`` is the value ``q/100`` of the way from the minimum to the
1203 maximum in a sorted copy of ``V``. The values and distances of
1204 the two nearest neighbors as well as the `interpolation` parameter
1205 will determine the percentile if the normalized ranking does not
1206 match the location of ``q`` exactly. This function is the same as
1207 the median if ``q=50``, the same as the minimum if ``q=0`` and the
1208 same as the maximum if ``q=100``.
1210 Examples
1211 --------
1212 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1213 >>> a[0][1] = np.nan
1214 >>> a
1215 array([[10., nan, 4.],
1216 [ 3., 2., 1.]])
1217 >>> np.percentile(a, 50)
1218 nan
1219 >>> np.nanpercentile(a, 50)
1220 3.0
1221 >>> np.nanpercentile(a, 50, axis=0)
1222 array([6.5, 2. , 2.5])
1223 >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
1224 array([[7.],
1225 [2.]])
1226 >>> m = np.nanpercentile(a, 50, axis=0)
1227 >>> out = np.zeros_like(m)
1228 >>> np.nanpercentile(a, 50, axis=0, out=out)
1229 array([6.5, 2. , 2.5])
1230 >>> m
1231 array([6.5, 2. , 2.5])
1233 >>> b = a.copy()
1234 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
1235 array([7., 2.])
1236 >>> assert not np.all(a==b)
1238 """
1239 a = np.asanyarray(a)
1240 q = np.true_divide(q, 100.0) # handles the asarray for us too
1241 if not function_base._quantile_is_valid(q):
1242 raise ValueError("Percentiles must be in the range [0, 100]")
1243 return _nanquantile_unchecked(
1244 a, q, axis, out, overwrite_input, interpolation, keepdims)
1247def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
1248 interpolation=None, keepdims=None):
1249 return (a, q, out)
1252@array_function_dispatch(_nanquantile_dispatcher)
1253def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
1254 interpolation='linear', keepdims=np._NoValue):
1255 """
1256 Compute the qth quantile of the data along the specified axis,
1257 while ignoring nan values.
1258 Returns the qth quantile(s) of the array elements.
1260 .. versionadded:: 1.15.0
1262 Parameters
1263 ----------
1264 a : array_like
1265 Input array or object that can be converted to an array, containing
1266 nan values to be ignored
1267 q : array_like of float
1268 Quantile or sequence of quantiles to compute, which must be between
1269 0 and 1 inclusive.
1270 axis : {int, tuple of int, None}, optional
1271 Axis or axes along which the quantiles are computed. The
1272 default is to compute the quantile(s) along a flattened
1273 version of the array.
1274 out : ndarray, optional
1275 Alternative output array in which to place the result. It must
1276 have the same shape and buffer length as the expected output,
1277 but the type (of the output) will be cast if necessary.
1278 overwrite_input : bool, optional
1279 If True, then allow the input array `a` to be modified by intermediate
1280 calculations, to save memory. In this case, the contents of the input
1281 `a` after this function completes is undefined.
1282 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1283 This optional parameter specifies the interpolation method to
1284 use when the desired quantile lies between two data points
1285 ``i < j``:
1287 * linear: ``i + (j - i) * fraction``, where ``fraction``
1288 is the fractional part of the index surrounded by ``i``
1289 and ``j``.
1290 * lower: ``i``.
1291 * higher: ``j``.
1292 * nearest: ``i`` or ``j``, whichever is nearest.
1293 * midpoint: ``(i + j) / 2``.
1295 keepdims : bool, optional
1296 If this is set to True, the axes which are reduced are left in
1297 the result as dimensions with size one. With this option, the
1298 result will broadcast correctly against the original array `a`.
1300 If this is anything but the default value it will be passed
1301 through (in the special case of an empty array) to the
1302 `mean` function of the underlying array. If the array is
1303 a sub-class and `mean` does not have the kwarg `keepdims` this
1304 will raise a RuntimeError.
1306 Returns
1307 -------
1308 quantile : scalar or ndarray
1309 If `q` is a single percentile and `axis=None`, then the result
1310 is a scalar. If multiple quantiles are given, first axis of
1311 the result corresponds to the quantiles. The other axes are
1312 the axes that remain after the reduction of `a`. If the input
1313 contains integers or floats smaller than ``float64``, the output
1314 data-type is ``float64``. Otherwise, the output data-type is the
1315 same as that of the input. If `out` is specified, that array is
1316 returned instead.
1318 See Also
1319 --------
1320 quantile
1321 nanmean, nanmedian
1322 nanmedian : equivalent to ``nanquantile(..., 0.5)``
1323 nanpercentile : same as nanquantile, but with q in the range [0, 100].
1325 Examples
1326 --------
1327 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
1328 >>> a[0][1] = np.nan
1329 >>> a
1330 array([[10., nan, 4.],
1331 [ 3., 2., 1.]])
1332 >>> np.quantile(a, 0.5)
1333 nan
1334 >>> np.nanquantile(a, 0.5)
1335 3.0
1336 >>> np.nanquantile(a, 0.5, axis=0)
1337 array([6.5, 2. , 2.5])
1338 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
1339 array([[7.],
1340 [2.]])
1341 >>> m = np.nanquantile(a, 0.5, axis=0)
1342 >>> out = np.zeros_like(m)
1343 >>> np.nanquantile(a, 0.5, axis=0, out=out)
1344 array([6.5, 2. , 2.5])
1345 >>> m
1346 array([6.5, 2. , 2.5])
1347 >>> b = a.copy()
1348 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
1349 array([7., 2.])
1350 >>> assert not np.all(a==b)
1351 """
1352 a = np.asanyarray(a)
1353 q = np.asanyarray(q)
1354 if not function_base._quantile_is_valid(q):
1355 raise ValueError("Quantiles must be in the range [0, 1]")
1356 return _nanquantile_unchecked(
1357 a, q, axis, out, overwrite_input, interpolation, keepdims)
1360def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
1361 interpolation='linear', keepdims=np._NoValue):
1362 """Assumes that q is in [0, 1], and is an ndarray"""
1363 # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
1364 # so deal them upfront
1365 if a.size == 0:
1366 return np.nanmean(a, axis, out=out, keepdims=keepdims)
1368 r, k = function_base._ureduce(
1369 a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out,
1370 overwrite_input=overwrite_input, interpolation=interpolation
1371 )
1372 if keepdims and keepdims is not np._NoValue:
1373 return r.reshape(q.shape + k)
1374 else:
1375 return r
1378def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
1379 interpolation='linear'):
1380 """
1381 Private function that doesn't support extended axis or keepdims.
1382 These methods are extended to this function using _ureduce
1383 See nanpercentile for parameter usage
1384 """
1385 if axis is None or a.ndim == 1:
1386 part = a.ravel()
1387 result = _nanquantile_1d(part, q, overwrite_input, interpolation)
1388 else:
1389 result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
1390 overwrite_input, interpolation)
1391 # apply_along_axis fills in collapsed axis with results.
1392 # Move that axis to the beginning to match percentile's
1393 # convention.
1394 if q.ndim != 0:
1395 result = np.moveaxis(result, axis, 0)
1397 if out is not None:
1398 out[...] = result
1399 return result
1402def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
1403 """
1404 Private function for rank 1 arrays. Compute quantile ignoring NaNs.
1405 See nanpercentile for parameter usage
1406 """
1407 arr1d, overwrite_input = _remove_nan_1d(arr1d,
1408 overwrite_input=overwrite_input)
1409 if arr1d.size == 0:
1410 return np.full(q.shape, np.nan)[()] # convert to scalar
1412 return function_base._quantile_unchecked(
1413 arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
1416def _nanvar_dispatcher(
1417 a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
1418 return (a, out)
1421@array_function_dispatch(_nanvar_dispatcher)
1422def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
1423 """
1424 Compute the variance along the specified axis, while ignoring NaNs.
1426 Returns the variance of the array elements, a measure of the spread of
1427 a distribution. The variance is computed for the flattened array by
1428 default, otherwise over the specified axis.
1430 For all-NaN slices or slices with zero degrees of freedom, NaN is
1431 returned and a `RuntimeWarning` is raised.
1433 .. versionadded:: 1.8.0
1435 Parameters
1436 ----------
1437 a : array_like
1438 Array containing numbers whose variance is desired. If `a` is not an
1439 array, a conversion is attempted.
1440 axis : {int, tuple of int, None}, optional
1441 Axis or axes along which the variance is computed. The default is to compute
1442 the variance of the flattened array.
1443 dtype : data-type, optional
1444 Type to use in computing the variance. For arrays of integer type
1445 the default is `float64`; for arrays of float types it is the same as
1446 the array type.
1447 out : ndarray, optional
1448 Alternate output array in which to place the result. It must have
1449 the same shape as the expected output, but the type is cast if
1450 necessary.
1451 ddof : int, optional
1452 "Delta Degrees of Freedom": the divisor used in the calculation is
1453 ``N - ddof``, where ``N`` represents the number of non-NaN
1454 elements. By default `ddof` is zero.
1455 keepdims : bool, optional
1456 If this is set to True, the axes which are reduced are left
1457 in the result as dimensions with size one. With this option,
1458 the result will broadcast correctly against the original `a`.
1461 Returns
1462 -------
1463 variance : ndarray, see dtype parameter above
1464 If `out` is None, return a new array containing the variance,
1465 otherwise return a reference to the output array. If ddof is >= the
1466 number of non-NaN elements in a slice or the slice contains only
1467 NaNs, then the result for that slice is NaN.
1469 See Also
1470 --------
1471 std : Standard deviation
1472 mean : Average
1473 var : Variance while not ignoring NaNs
1474 nanstd, nanmean
1475 ufuncs-output-type
1477 Notes
1478 -----
1479 The variance is the average of the squared deviations from the mean,
1480 i.e., ``var = mean(abs(x - x.mean())**2)``.
1482 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1483 If, however, `ddof` is specified, the divisor ``N - ddof`` is used
1484 instead. In standard statistical practice, ``ddof=1`` provides an
1485 unbiased estimator of the variance of a hypothetical infinite
1486 population. ``ddof=0`` provides a maximum likelihood estimate of the
1487 variance for normally distributed variables.
1489 Note that for complex numbers, the absolute value is taken before
1490 squaring, so that the result is always real and nonnegative.
1492 For floating-point input, the variance is computed using the same
1493 precision the input has. Depending on the input data, this can cause
1494 the results to be inaccurate, especially for `float32` (see example
1495 below). Specifying a higher-accuracy accumulator using the ``dtype``
1496 keyword can alleviate this issue.
1498 For this function to work on sub-classes of ndarray, they must define
1499 `sum` with the kwarg `keepdims`
1501 Examples
1502 --------
1503 >>> a = np.array([[1, np.nan], [3, 4]])
1504 >>> np.nanvar(a)
1505 1.5555555555555554
1506 >>> np.nanvar(a, axis=0)
1507 array([1., 0.])
1508 >>> np.nanvar(a, axis=1)
1509 array([0., 0.25]) # may vary
1511 """
1512 arr, mask = _replace_nan(a, 0)
1513 if mask is None:
1514 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
1515 keepdims=keepdims)
1517 if dtype is not None:
1518 dtype = np.dtype(dtype)
1519 if dtype is not None and not issubclass(dtype.type, np.inexact):
1520 raise TypeError("If a is inexact, then dtype must be inexact")
1521 if out is not None and not issubclass(out.dtype.type, np.inexact):
1522 raise TypeError("If a is inexact, then out must be inexact")
1524 # Compute mean
1525 if type(arr) is np.matrix:
1526 _keepdims = np._NoValue
1527 else:
1528 _keepdims = True
1529 # we need to special case matrix for reverse compatibility
1530 # in order for this to work, these sums need to be called with
1531 # keepdims=True, however matrix now raises an error in this case, but
1532 # the reason that it drops the keepdims kwarg is to force keepdims=True
1533 # so this used to work by serendipity.
1534 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims)
1535 avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims)
1536 avg = _divide_by_count(avg, cnt)
1538 # Compute squared deviation from mean.
1539 np.subtract(arr, avg, out=arr, casting='unsafe')
1540 arr = _copyto(arr, 0, mask)
1541 if issubclass(arr.dtype.type, np.complexfloating):
1542 sqr = np.multiply(arr, arr.conj(), out=arr).real
1543 else:
1544 sqr = np.multiply(arr, arr, out=arr)
1546 # Compute variance.
1547 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
1548 if var.ndim < cnt.ndim:
1549 # Subclasses of ndarray may ignore keepdims, so check here.
1550 cnt = cnt.squeeze(axis)
1551 dof = cnt - ddof
1552 var = _divide_by_count(var, dof)
1554 isbad = (dof <= 0)
1555 if np.any(isbad):
1556 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
1557 stacklevel=3)
1558 # NaN, inf, or negative numbers are all possible bad
1559 # values, so explicitly replace them with NaN.
1560 var = _copyto(var, np.nan, isbad)
1561 return var
1564def _nanstd_dispatcher(
1565 a, axis=None, dtype=None, out=None, ddof=None, keepdims=None):
1566 return (a, out)
1569@array_function_dispatch(_nanstd_dispatcher)
1570def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
1571 """
1572 Compute the standard deviation along the specified axis, while
1573 ignoring NaNs.
1575 Returns the standard deviation, a measure of the spread of a
1576 distribution, of the non-NaN array elements. The standard deviation is
1577 computed for the flattened array by default, otherwise over the
1578 specified axis.
1580 For all-NaN slices or slices with zero degrees of freedom, NaN is
1581 returned and a `RuntimeWarning` is raised.
1583 .. versionadded:: 1.8.0
1585 Parameters
1586 ----------
1587 a : array_like
1588 Calculate the standard deviation of the non-NaN values.
1589 axis : {int, tuple of int, None}, optional
1590 Axis or axes along which the standard deviation is computed. The default is
1591 to compute the standard deviation of the flattened array.
1592 dtype : dtype, optional
1593 Type to use in computing the standard deviation. For arrays of
1594 integer type the default is float64, for arrays of float types it
1595 is the same as the array type.
1596 out : ndarray, optional
1597 Alternative output array in which to place the result. It must have
1598 the same shape as the expected output but the type (of the
1599 calculated values) will be cast if necessary.
1600 ddof : int, optional
1601 Means Delta Degrees of Freedom. The divisor used in calculations
1602 is ``N - ddof``, where ``N`` represents the number of non-NaN
1603 elements. By default `ddof` is zero.
1605 keepdims : bool, optional
1606 If this is set to True, the axes which are reduced are left
1607 in the result as dimensions with size one. With this option,
1608 the result will broadcast correctly against the original `a`.
1610 If this value is anything but the default it is passed through
1611 as-is to the relevant functions of the sub-classes. If these
1612 functions do not have a `keepdims` kwarg, a RuntimeError will
1613 be raised.
1615 Returns
1616 -------
1617 standard_deviation : ndarray, see dtype parameter above.
1618 If `out` is None, return a new array containing the standard
1619 deviation, otherwise return a reference to the output array. If
1620 ddof is >= the number of non-NaN elements in a slice or the slice
1621 contains only NaNs, then the result for that slice is NaN.
1623 See Also
1624 --------
1625 var, mean, std
1626 nanvar, nanmean
1627 ufuncs-output-type
1629 Notes
1630 -----
1631 The standard deviation is the square root of the average of the squared
1632 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1634 The average squared deviation is normally calculated as
1635 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
1636 specified, the divisor ``N - ddof`` is used instead. In standard
1637 statistical practice, ``ddof=1`` provides an unbiased estimator of the
1638 variance of the infinite population. ``ddof=0`` provides a maximum
1639 likelihood estimate of the variance for normally distributed variables.
1640 The standard deviation computed in this function is the square root of
1641 the estimated variance, so even with ``ddof=1``, it will not be an
1642 unbiased estimate of the standard deviation per se.
1644 Note that, for complex numbers, `std` takes the absolute value before
1645 squaring, so that the result is always real and nonnegative.
1647 For floating-point input, the *std* is computed using the same
1648 precision the input has. Depending on the input data, this can cause
1649 the results to be inaccurate, especially for float32 (see example
1650 below). Specifying a higher-accuracy accumulator using the `dtype`
1651 keyword can alleviate this issue.
1653 Examples
1654 --------
1655 >>> a = np.array([[1, np.nan], [3, 4]])
1656 >>> np.nanstd(a)
1657 1.247219128924647
1658 >>> np.nanstd(a, axis=0)
1659 array([1., 0.])
1660 >>> np.nanstd(a, axis=1)
1661 array([0., 0.5]) # may vary
1663 """
1664 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
1665 keepdims=keepdims)
1666 if isinstance(var, np.ndarray):
1667 std = np.sqrt(var, out=var)
1668 else:
1669 std = var.dtype.type(np.sqrt(var))
1670 return std