Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/boolean.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numbers
2from typing import TYPE_CHECKING, Any, List, Tuple, Type
3import warnings
5import numpy as np
7from pandas._libs import lib, missing as libmissing
8from pandas.compat import set_function_name
9from pandas.compat.numpy import function as nv
11from pandas.core.dtypes.base import ExtensionDtype
12from pandas.core.dtypes.cast import astype_nansafe
13from pandas.core.dtypes.common import (
14 is_bool_dtype,
15 is_extension_array_dtype,
16 is_float,
17 is_float_dtype,
18 is_integer_dtype,
19 is_list_like,
20 is_numeric_dtype,
21 is_scalar,
22 pandas_dtype,
23)
24from pandas.core.dtypes.dtypes import register_extension_dtype
25from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
26from pandas.core.dtypes.missing import isna, notna
28from pandas.core import nanops, ops
29from pandas.core.indexers import check_array_indexer
31from .masked import BaseMaskedArray
33if TYPE_CHECKING:
34 from pandas._typing import Scalar
37@register_extension_dtype
38class BooleanDtype(ExtensionDtype):
39 """
40 Extension dtype for boolean data.
42 .. versionadded:: 1.0.0
44 .. warning::
46 BooleanDtype is considered experimental. The implementation and
47 parts of the API may change without warning.
49 Attributes
50 ----------
51 None
53 Methods
54 -------
55 None
57 Examples
58 --------
59 >>> pd.BooleanDtype()
60 BooleanDtype
61 """
63 name = "boolean"
65 @property
66 def na_value(self) -> "Scalar":
67 """
68 BooleanDtype uses :attr:`pandas.NA` as the missing NA value.
70 .. warning::
72 `na_value` may change in a future release.
73 """
74 return libmissing.NA
76 @property
77 def type(self) -> Type:
78 return np.bool_
80 @property
81 def kind(self) -> str:
82 return "b"
84 @classmethod
85 def construct_array_type(cls) -> "Type[BooleanArray]":
86 return BooleanArray
88 def __repr__(self) -> str:
89 return "BooleanDtype"
91 @property
92 def _is_boolean(self) -> bool:
93 return True
95 def __from_arrow__(self, array):
96 """Construct BooleanArray from passed pyarrow Array/ChunkedArray"""
97 import pyarrow
99 if isinstance(array, pyarrow.Array):
100 chunks = [array]
101 else:
102 # pyarrow.ChunkedArray
103 chunks = array.chunks
105 results = []
106 for arr in chunks:
107 # TODO should optimize this without going through object array
108 bool_arr = BooleanArray._from_sequence(np.array(arr))
109 results.append(bool_arr)
111 return BooleanArray._concat_same_type(results)
114def coerce_to_array(values, mask=None, copy: bool = False):
115 """
116 Coerce the input values array to numpy arrays with a mask.
118 Parameters
119 ----------
120 values : 1D list-like
121 mask : bool 1D array, optional
122 copy : bool, default False
123 if True, copy the input
125 Returns
126 -------
127 tuple of (values, mask)
128 """
129 if isinstance(values, BooleanArray):
130 if mask is not None:
131 raise ValueError("cannot pass mask for BooleanArray input")
132 values, mask = values._data, values._mask
133 if copy:
134 values = values.copy()
135 mask = mask.copy()
136 return values, mask
138 mask_values = None
139 if isinstance(values, np.ndarray) and values.dtype == np.bool_:
140 if copy:
141 values = values.copy()
142 elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
143 mask_values = isna(values)
145 values_bool = np.zeros(len(values), dtype=bool)
146 values_bool[~mask_values] = values[~mask_values].astype(bool)
148 if not np.all(
149 values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
150 ):
151 raise TypeError("Need to pass bool-like values")
153 values = values_bool
154 else:
155 values_object = np.asarray(values, dtype=object)
157 inferred_dtype = lib.infer_dtype(values_object, skipna=True)
158 integer_like = ("floating", "integer", "mixed-integer-float")
159 if inferred_dtype not in ("boolean", "empty") + integer_like:
160 raise TypeError("Need to pass bool-like values")
162 mask_values = isna(values_object)
163 values = np.zeros(len(values), dtype=bool)
164 values[~mask_values] = values_object[~mask_values].astype(bool)
166 # if the values were integer-like, validate it were actually 0/1's
167 if inferred_dtype in integer_like:
168 if not np.all(
169 values[~mask_values].astype(float)
170 == values_object[~mask_values].astype(float)
171 ):
172 raise TypeError("Need to pass bool-like values")
174 if mask is None and mask_values is None:
175 mask = np.zeros(len(values), dtype=bool)
176 elif mask is None:
177 mask = mask_values
178 else:
179 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_:
180 if mask_values is not None:
181 mask = mask | mask_values
182 else:
183 if copy:
184 mask = mask.copy()
185 else:
186 mask = np.array(mask, dtype=bool)
187 if mask_values is not None:
188 mask = mask | mask_values
190 if not values.ndim == 1:
191 raise ValueError("values must be a 1D list-like")
192 if not mask.ndim == 1:
193 raise ValueError("mask must be a 1D list-like")
195 return values, mask
198class BooleanArray(BaseMaskedArray):
199 """
200 Array of boolean (True/False) data with missing values.
202 This is a pandas Extension array for boolean data, under the hood
203 represented by 2 numpy arrays: a boolean array with the data and
204 a boolean array with the mask (True indicating missing).
206 BooleanArray implements Kleene logic (sometimes called three-value
207 logic) for logical operations. See :ref:`boolean.kleene` for more.
209 To construct an BooleanArray from generic array-like input, use
210 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples
211 below).
213 .. versionadded:: 1.0.0
215 .. warning::
217 BooleanArray is considered experimental. The implementation and
218 parts of the API may change without warning.
220 Parameters
221 ----------
222 values : numpy.ndarray
223 A 1-d boolean-dtype array with the data.
224 mask : numpy.ndarray
225 A 1-d boolean-dtype array indicating missing values (True
226 indicates missing).
227 copy : bool, default False
228 Whether to copy the `values` and `mask` arrays.
230 Attributes
231 ----------
232 None
234 Methods
235 -------
236 None
238 Returns
239 -------
240 BooleanArray
242 Examples
243 --------
244 Create an BooleanArray with :func:`pandas.array`:
246 >>> pd.array([True, False, None], dtype="boolean")
247 <BooleanArray>
248 [True, False, <NA>]
249 Length: 3, dtype: boolean
250 """
252 # The value used to fill '_data' to avoid upcasting
253 _internal_fill_value = False
255 def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
256 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
257 raise TypeError(
258 "values should be boolean numpy array. Use "
259 "the 'array' function instead"
260 )
261 if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
262 raise TypeError(
263 "mask should be boolean numpy array. Use "
264 "the 'array' function instead"
265 )
266 if not values.ndim == 1:
267 raise ValueError("values must be a 1D array")
268 if not mask.ndim == 1:
269 raise ValueError("mask must be a 1D array")
271 if copy:
272 values = values.copy()
273 mask = mask.copy()
275 self._data = values
276 self._mask = mask
277 self._dtype = BooleanDtype()
279 @property
280 def dtype(self):
281 return self._dtype
283 @classmethod
284 def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
285 if dtype:
286 assert dtype == "boolean"
287 values, mask = coerce_to_array(scalars, copy=copy)
288 return BooleanArray(values, mask)
290 @classmethod
291 def _from_sequence_of_strings(
292 cls, strings: List[str], dtype=None, copy: bool = False
293 ):
294 def map_string(s):
295 if isna(s):
296 return s
297 elif s in ["True", "TRUE", "true"]:
298 return True
299 elif s in ["False", "FALSE", "false"]:
300 return False
301 else:
302 raise ValueError(f"{s} cannot be cast to bool")
304 scalars = [map_string(x) for x in strings]
305 return cls._from_sequence(scalars, dtype, copy)
307 def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
308 data = self._data.astype("int8")
309 data[self._mask] = -1
310 return data, -1
312 @classmethod
313 def _from_factorized(cls, values, original: "BooleanArray"):
314 return cls._from_sequence(values, dtype=original.dtype)
316 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
318 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
319 # For BooleanArray inputs, we apply the ufunc to ._data
320 # and mask the result.
321 if method == "reduce":
322 # Not clear how to handle missing values in reductions. Raise.
323 raise NotImplementedError("The 'reduce' method is not supported.")
324 out = kwargs.get("out", ())
326 for x in inputs + out:
327 if not isinstance(x, self._HANDLED_TYPES + (BooleanArray,)):
328 return NotImplemented
330 # for binary ops, use our custom dunder methods
331 result = ops.maybe_dispatch_ufunc_to_dunder_op(
332 self, ufunc, method, *inputs, **kwargs
333 )
334 if result is not NotImplemented:
335 return result
337 mask = np.zeros(len(self), dtype=bool)
338 inputs2 = []
339 for x in inputs:
340 if isinstance(x, BooleanArray):
341 mask |= x._mask
342 inputs2.append(x._data)
343 else:
344 inputs2.append(x)
346 def reconstruct(x):
347 # we don't worry about scalar `x` here, since we
348 # raise for reduce up above.
350 if is_bool_dtype(x.dtype):
351 m = mask.copy()
352 return BooleanArray(x, m)
353 else:
354 x[mask] = np.nan
355 return x
357 result = getattr(ufunc, method)(*inputs2, **kwargs)
358 if isinstance(result, tuple):
359 tuple(reconstruct(x) for x in result)
360 else:
361 return reconstruct(result)
363 def __setitem__(self, key, value):
364 _is_scalar = is_scalar(value)
365 if _is_scalar:
366 value = [value]
367 value, mask = coerce_to_array(value)
369 if _is_scalar:
370 value = value[0]
371 mask = mask[0]
373 key = check_array_indexer(self, key)
374 self._data[key] = value
375 self._mask[key] = mask
377 def astype(self, dtype, copy=True):
378 """
379 Cast to a NumPy array or ExtensionArray with 'dtype'.
381 Parameters
382 ----------
383 dtype : str or dtype
384 Typecode or data-type to which the array is cast.
385 copy : bool, default True
386 Whether to copy the data, even if not necessary. If False,
387 a copy is made only if the old dtype does not match the
388 new dtype.
390 Returns
391 -------
392 array : ndarray or ExtensionArray
393 NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype.
395 Raises
396 ------
397 TypeError
398 if incompatible type with an BooleanDtype, equivalent of same_kind
399 casting
400 """
401 dtype = pandas_dtype(dtype)
403 if isinstance(dtype, BooleanDtype):
404 values, mask = coerce_to_array(self, copy=copy)
405 return BooleanArray(values, mask, copy=False)
407 if is_bool_dtype(dtype):
408 # astype_nansafe converts np.nan to True
409 if self._hasna:
410 raise ValueError("cannot convert float NaN to bool")
411 else:
412 return self._data.astype(dtype, copy=copy)
413 if is_extension_array_dtype(dtype) and is_integer_dtype(dtype):
414 from pandas.core.arrays import IntegerArray
416 return IntegerArray(
417 self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
418 )
419 # for integer, error if there are missing values
420 if is_integer_dtype(dtype):
421 if self._hasna:
422 raise ValueError("cannot convert NA to integer")
423 # for float dtype, ensure we use np.nan before casting (numpy cannot
424 # deal with pd.NA)
425 na_value = self._na_value
426 if is_float_dtype(dtype):
427 na_value = np.nan
428 # coerce
429 data = self.to_numpy(na_value=na_value)
430 return astype_nansafe(data, dtype, copy=False)
432 def _values_for_argsort(self) -> np.ndarray:
433 """
434 Return values for sorting.
436 Returns
437 -------
438 ndarray
439 The transformed values should maintain the ordering between values
440 within the array.
442 See Also
443 --------
444 ExtensionArray.argsort
445 """
446 data = self._data.copy()
447 data[self._mask] = -1
448 return data
450 def any(self, skipna: bool = True, **kwargs):
451 """
452 Return whether any element is True.
454 Returns False unless there is at least one element that is True.
455 By default, NAs are skipped. If ``skipna=False`` is specified and
456 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
457 is used as for logical operations.
459 Parameters
460 ----------
461 skipna : bool, default True
462 Exclude NA values. If the entire array is NA and `skipna` is
463 True, then the result will be False, as for an empty array.
464 If `skipna` is False, the result will still be True if there is
465 at least one element that is True, otherwise NA will be returned
466 if there are NA's present.
467 **kwargs : any, default None
468 Additional keywords have no effect but might be accepted for
469 compatibility with NumPy.
471 Returns
472 -------
473 bool or :attr:`pandas.NA`
475 See Also
476 --------
477 numpy.any : Numpy version of this method.
478 BooleanArray.all : Return whether all elements are True.
480 Examples
481 --------
483 The result indicates whether any element is True (and by default
484 skips NAs):
486 >>> pd.array([True, False, True]).any()
487 True
488 >>> pd.array([True, False, pd.NA]).any()
489 True
490 >>> pd.array([False, False, pd.NA]).any()
491 False
492 >>> pd.array([], dtype="boolean").any()
493 False
494 >>> pd.array([pd.NA], dtype="boolean").any()
495 False
497 With ``skipna=False``, the result can be NA if this is logically
498 required (whether ``pd.NA`` is True or False influences the result):
500 >>> pd.array([True, False, pd.NA]).any(skipna=False)
501 True
502 >>> pd.array([False, False, pd.NA]).any(skipna=False)
503 <NA>
504 """
505 kwargs.pop("axis", None)
506 nv.validate_any((), kwargs)
508 values = self._data.copy()
509 np.putmask(values, self._mask, False)
510 result = values.any()
511 if skipna:
512 return result
513 else:
514 if result or len(self) == 0:
515 return result
516 else:
517 return self.dtype.na_value
519 def all(self, skipna: bool = True, **kwargs):
520 """
521 Return whether all elements are True.
523 Returns True unless there is at least one element that is False.
524 By default, NAs are skipped. If ``skipna=False`` is specified and
525 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
526 is used as for logical operations.
528 Parameters
529 ----------
530 skipna : bool, default True
531 Exclude NA values. If the entire array is NA and `skipna` is
532 True, then the result will be True, as for an empty array.
533 If `skipna` is False, the result will still be False if there is
534 at least one element that is False, otherwise NA will be returned
535 if there are NA's present.
536 **kwargs : any, default None
537 Additional keywords have no effect but might be accepted for
538 compatibility with NumPy.
540 Returns
541 -------
542 bool or :attr:`pandas.NA`
544 See Also
545 --------
546 numpy.all : Numpy version of this method.
547 BooleanArray.any : Return whether any element is True.
549 Examples
550 --------
552 The result indicates whether any element is True (and by default
553 skips NAs):
555 >>> pd.array([True, True, pd.NA]).all()
556 True
557 >>> pd.array([True, False, pd.NA]).all()
558 False
559 >>> pd.array([], dtype="boolean").all()
560 True
561 >>> pd.array([pd.NA], dtype="boolean").all()
562 True
564 With ``skipna=False``, the result can be NA if this is logically
565 required (whether ``pd.NA`` is True or False influences the result):
567 >>> pd.array([True, True, pd.NA]).all(skipna=False)
568 <NA>
569 >>> pd.array([True, False, pd.NA]).all(skipna=False)
570 False
571 """
572 kwargs.pop("axis", None)
573 nv.validate_all((), kwargs)
575 values = self._data.copy()
576 np.putmask(values, self._mask, True)
577 result = values.all()
579 if skipna:
580 return result
581 else:
582 if not result or len(self) == 0:
583 return result
584 else:
585 return self.dtype.na_value
587 @classmethod
588 def _create_logical_method(cls, op):
589 def logical_method(self, other):
590 if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
591 # Rely on pandas to unbox and dispatch to us.
592 return NotImplemented
594 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
595 other = lib.item_from_zerodim(other)
596 other_is_booleanarray = isinstance(other, BooleanArray)
597 other_is_scalar = lib.is_scalar(other)
598 mask = None
600 if other_is_booleanarray:
601 other, mask = other._data, other._mask
602 elif is_list_like(other):
603 other = np.asarray(other, dtype="bool")
604 if other.ndim > 1:
605 raise NotImplementedError(
606 "can only perform ops with 1-d structures"
607 )
608 other, mask = coerce_to_array(other, copy=False)
609 elif isinstance(other, np.bool_):
610 other = other.item()
612 if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
613 raise TypeError(
614 "'other' should be pandas.NA or a bool. "
615 f"Got {type(other).__name__} instead."
616 )
618 if not other_is_scalar and len(self) != len(other):
619 raise ValueError("Lengths must match to compare")
621 if op.__name__ in {"or_", "ror_"}:
622 result, mask = ops.kleene_or(self._data, other, self._mask, mask)
623 elif op.__name__ in {"and_", "rand_"}:
624 result, mask = ops.kleene_and(self._data, other, self._mask, mask)
625 elif op.__name__ in {"xor", "rxor"}:
626 result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
628 return BooleanArray(result, mask)
630 name = f"__{op.__name__}__"
631 return set_function_name(logical_method, name, cls)
633 @classmethod
634 def _create_comparison_method(cls, op):
635 def cmp_method(self, other):
636 from pandas.arrays import IntegerArray
638 if isinstance(
639 other, (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray)
640 ):
641 # Rely on pandas to unbox and dispatch to us.
642 return NotImplemented
644 other = lib.item_from_zerodim(other)
645 mask = None
647 if isinstance(other, BooleanArray):
648 other, mask = other._data, other._mask
650 elif is_list_like(other):
651 other = np.asarray(other)
652 if other.ndim > 1:
653 raise NotImplementedError(
654 "can only perform ops with 1-d structures"
655 )
656 if len(self) != len(other):
657 raise ValueError("Lengths must match to compare")
659 if other is libmissing.NA:
660 # numpy does not handle pd.NA well as "other" scalar (it returns
661 # a scalar False instead of an array)
662 result = np.zeros_like(self._data)
663 mask = np.ones_like(self._data)
664 else:
665 # numpy will show a DeprecationWarning on invalid elementwise
666 # comparisons, this will raise in the future
667 with warnings.catch_warnings():
668 warnings.filterwarnings("ignore", "elementwise", FutureWarning)
669 with np.errstate(all="ignore"):
670 result = op(self._data, other)
672 # nans propagate
673 if mask is None:
674 mask = self._mask.copy()
675 else:
676 mask = self._mask | mask
678 return BooleanArray(result, mask, copy=False)
680 name = f"__{op.__name__}"
681 return set_function_name(cmp_method, name, cls)
683 def _reduce(self, name, skipna=True, **kwargs):
685 if name in {"any", "all"}:
686 return getattr(self, name)(skipna=skipna, **kwargs)
688 data = self._data
689 mask = self._mask
691 # coerce to a nan-aware float if needed
692 if self._hasna:
693 data = self.to_numpy("float64", na_value=np.nan)
695 op = getattr(nanops, "nan" + name)
696 result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
698 if np.isnan(result):
699 return libmissing.NA
701 # if we have numeric op that would result in an int, coerce to int if possible
702 if name in ["sum", "prod"] and notna(result):
703 int_result = np.int64(result)
704 if int_result == result:
705 result = int_result
707 elif name in ["min", "max"] and notna(result):
708 result = np.bool_(result)
710 return result
712 def _maybe_mask_result(self, result, mask, other, op_name):
713 """
714 Parameters
715 ----------
716 result : array-like
717 mask : array-like bool
718 other : scalar or array-like
719 op_name : str
720 """
721 # if we have a float operand we are by-definition
722 # a float result
723 # or our op is a divide
724 if (is_float_dtype(other) or is_float(other)) or (
725 op_name in ["rtruediv", "truediv"]
726 ):
727 result[mask] = np.nan
728 return result
730 if is_bool_dtype(result):
731 return BooleanArray(result, mask, copy=False)
733 elif is_integer_dtype(result):
734 from pandas.core.arrays import IntegerArray
736 return IntegerArray(result, mask, copy=False)
737 else:
738 result[mask] = np.nan
739 return result
741 @classmethod
742 def _create_arithmetic_method(cls, op):
743 op_name = op.__name__
745 def boolean_arithmetic_method(self, other):
747 if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
748 # Rely on pandas to unbox and dispatch to us.
749 return NotImplemented
751 other = lib.item_from_zerodim(other)
752 mask = None
754 if isinstance(other, BooleanArray):
755 other, mask = other._data, other._mask
757 elif is_list_like(other):
758 other = np.asarray(other)
759 if other.ndim > 1:
760 raise NotImplementedError(
761 "can only perform ops with 1-d structures"
762 )
763 if len(self) != len(other):
764 raise ValueError("Lengths must match")
766 # nans propagate
767 if mask is None:
768 mask = self._mask
769 else:
770 mask = self._mask | mask
772 with np.errstate(all="ignore"):
773 result = op(self._data, other)
775 # divmod returns a tuple
776 if op_name == "divmod":
777 div, mod = result
778 return (
779 self._maybe_mask_result(div, mask, other, "floordiv"),
780 self._maybe_mask_result(mod, mask, other, "mod"),
781 )
783 return self._maybe_mask_result(result, mask, other, op_name)
785 name = f"__{op_name}__"
786 return set_function_name(boolean_arithmetic_method, name, cls)
789BooleanArray._add_logical_ops()
790BooleanArray._add_comparison_ops()
791BooleanArray._add_arithmetic_ops()