Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/interval.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from operator import le, lt
2import textwrap
4import numpy as np
6from pandas._config import get_option
8from pandas._libs.interval import Interval, IntervalMixin, intervals_to_interval_bounds
9from pandas.compat.numpy import function as nv
10from pandas.util._decorators import Appender
12from pandas.core.dtypes.cast import maybe_convert_platform
13from pandas.core.dtypes.common import (
14 is_categorical_dtype,
15 is_datetime64_any_dtype,
16 is_float_dtype,
17 is_integer_dtype,
18 is_interval,
19 is_interval_dtype,
20 is_list_like,
21 is_object_dtype,
22 is_scalar,
23 is_string_dtype,
24 is_timedelta64_dtype,
25 pandas_dtype,
26)
27from pandas.core.dtypes.dtypes import IntervalDtype
28from pandas.core.dtypes.generic import (
29 ABCDatetimeIndex,
30 ABCExtensionArray,
31 ABCIndexClass,
32 ABCInterval,
33 ABCIntervalIndex,
34 ABCPeriodIndex,
35 ABCSeries,
36)
37from pandas.core.dtypes.missing import isna, notna
39from pandas.core.algorithms import take, value_counts
40from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
41from pandas.core.arrays.categorical import Categorical
42import pandas.core.common as com
43from pandas.core.construction import array
44from pandas.core.indexers import check_array_indexer
45from pandas.core.indexes.base import ensure_index
47_VALID_CLOSED = {"left", "right", "both", "neither"}
48_interval_shared_docs = {}
50_shared_docs_kwargs = dict(
51 klass="IntervalArray", qualname="arrays.IntervalArray", name=""
52)
55_interval_shared_docs[
56 "class"
57] = """
58%(summary)s
60.. versionadded:: %(versionadded)s
62Parameters
63----------
64data : array-like (1-dimensional)
65 Array-like containing Interval objects from which to build the
66 %(klass)s.
67closed : {'left', 'right', 'both', 'neither'}, default 'right'
68 Whether the intervals are closed on the left-side, right-side, both or
69 neither.
70dtype : dtype or None, default None
71 If None, dtype will be inferred.
73 .. versionadded:: 0.23.0
74copy : bool, default False
75 Copy the input data.
76%(name)s\
77verify_integrity : bool, default True
78 Verify that the %(klass)s is valid.
80Attributes
81----------
82left
83right
84closed
85mid
86length
87is_empty
88is_non_overlapping_monotonic
89%(extra_attributes)s\
91Methods
92-------
93from_arrays
94from_tuples
95from_breaks
96contains
97overlaps
98set_closed
99to_tuples
100%(extra_methods)s\
102See Also
103--------
104Index : The base pandas Index type.
105Interval : A bounded slice-like interval; the elements of an %(klass)s.
106interval_range : Function to create a fixed frequency IntervalIndex.
107cut : Bin values into discrete Intervals.
108qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
110Notes
111-----
112See the `user guide
113<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`_
114for more.
116%(examples)s\
117"""
120@Appender(
121 _interval_shared_docs["class"]
122 % dict(
123 klass="IntervalArray",
124 summary="Pandas array for interval data that are closed on the same side.",
125 versionadded="0.24.0",
126 name="",
127 extra_attributes="",
128 extra_methods="",
129 examples=textwrap.dedent(
130 """\
131 Examples
132 --------
133 A new ``IntervalArray`` can be constructed directly from an array-like of
134 ``Interval`` objects:
136 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
137 <IntervalArray>
138 [(0, 1], (1, 5]]
139 Length: 2, closed: right, dtype: interval[int64]
141 It may also be constructed using one of the constructor
142 methods: :meth:`IntervalArray.from_arrays`,
143 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
144 """
145 ),
146 )
147)
148class IntervalArray(IntervalMixin, ExtensionArray):
149 ndim = 1
150 can_hold_na = True
151 _na_value = _fill_value = np.nan
153 def __new__(cls, data, closed=None, dtype=None, copy=False, verify_integrity=True):
155 if isinstance(data, ABCSeries) and is_interval_dtype(data):
156 data = data.values
158 if isinstance(data, (cls, ABCIntervalIndex)):
159 left = data.left
160 right = data.right
161 closed = closed or data.closed
162 else:
164 # don't allow scalars
165 if is_scalar(data):
166 msg = (
167 f"{cls.__name__}(...) must be called with a collection "
168 f"of some kind, {data} was passed"
169 )
170 raise TypeError(msg)
172 # might need to convert empty or purely na data
173 data = maybe_convert_platform_interval(data)
174 left, right, infer_closed = intervals_to_interval_bounds(
175 data, validate_closed=closed is None
176 )
177 closed = closed or infer_closed
179 return cls._simple_new(
180 left,
181 right,
182 closed,
183 copy=copy,
184 dtype=dtype,
185 verify_integrity=verify_integrity,
186 )
188 @classmethod
189 def _simple_new(
190 cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True
191 ):
192 result = IntervalMixin.__new__(cls)
194 closed = closed or "right"
195 left = ensure_index(left, copy=copy)
196 right = ensure_index(right, copy=copy)
198 if dtype is not None:
199 # GH 19262: dtype must be an IntervalDtype to override inferred
200 dtype = pandas_dtype(dtype)
201 if not is_interval_dtype(dtype):
202 msg = f"dtype must be an IntervalDtype, got {dtype}"
203 raise TypeError(msg)
204 elif dtype.subtype is not None:
205 left = left.astype(dtype.subtype)
206 right = right.astype(dtype.subtype)
208 # coerce dtypes to match if needed
209 if is_float_dtype(left) and is_integer_dtype(right):
210 right = right.astype(left.dtype)
211 elif is_float_dtype(right) and is_integer_dtype(left):
212 left = left.astype(right.dtype)
214 if type(left) != type(right):
215 msg = (
216 f"must not have differing left [{type(left).__name__}] and "
217 f"right [{type(right).__name__}] types"
218 )
219 raise ValueError(msg)
220 elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
221 # GH 19016
222 msg = (
223 "category, object, and string subtypes are not supported "
224 "for IntervalArray"
225 )
226 raise TypeError(msg)
227 elif isinstance(left, ABCPeriodIndex):
228 msg = "Period dtypes are not supported, use a PeriodIndex instead"
229 raise ValueError(msg)
230 elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):
231 msg = (
232 "left and right must have the same time zone, got "
233 f"'{left.tz}' and '{right.tz}'"
234 )
235 raise ValueError(msg)
237 result._left = left
238 result._right = right
239 result._closed = closed
240 if verify_integrity:
241 result._validate()
242 return result
244 @classmethod
245 def _from_sequence(cls, scalars, dtype=None, copy=False):
246 return cls(scalars, dtype=dtype, copy=copy)
248 @classmethod
249 def _from_factorized(cls, values, original):
250 if len(values) == 0:
251 # An empty array returns object-dtype here. We can't create
252 # a new IA from an (empty) object-dtype array, so turn it into the
253 # correct dtype.
254 values = values.astype(original.dtype.subtype)
255 return cls(values, closed=original.closed)
257 _interval_shared_docs["from_breaks"] = textwrap.dedent(
258 """
259 Construct an %(klass)s from an array of splits.
261 Parameters
262 ----------
263 breaks : array-like (1-dimensional)
264 Left and right bounds for each interval.
265 closed : {'left', 'right', 'both', 'neither'}, default 'right'
266 Whether the intervals are closed on the left-side, right-side, both
267 or neither.
268 copy : bool, default False
269 Copy the data.
270 dtype : dtype or None, default None
271 If None, dtype will be inferred.
273 .. versionadded:: 0.23.0
275 Returns
276 -------
277 %(klass)s
279 See Also
280 --------
281 interval_range : Function to create a fixed frequency IntervalIndex.
282 %(klass)s.from_arrays : Construct from a left and right array.
283 %(klass)s.from_tuples : Construct from a sequence of tuples.
285 %(examples)s\
286 """
287 )
289 @classmethod
290 @Appender(
291 _interval_shared_docs["from_breaks"]
292 % dict(
293 klass="IntervalArray",
294 examples=textwrap.dedent(
295 """\
296 Examples
297 --------
298 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
299 <IntervalArray>
300 [(0, 1], (1, 2], (2, 3]]
301 Length: 3, closed: right, dtype: interval[int64]
302 """
303 ),
304 )
305 )
306 def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
307 breaks = maybe_convert_platform_interval(breaks)
309 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
311 _interval_shared_docs["from_arrays"] = textwrap.dedent(
312 """
313 Construct from two arrays defining the left and right bounds.
315 Parameters
316 ----------
317 left : array-like (1-dimensional)
318 Left bounds for each interval.
319 right : array-like (1-dimensional)
320 Right bounds for each interval.
321 closed : {'left', 'right', 'both', 'neither'}, default 'right'
322 Whether the intervals are closed on the left-side, right-side, both
323 or neither.
324 copy : bool, default False
325 Copy the data.
326 dtype : dtype, optional
327 If None, dtype will be inferred.
329 .. versionadded:: 0.23.0
331 Returns
332 -------
333 %(klass)s
335 Raises
336 ------
337 ValueError
338 When a value is missing in only one of `left` or `right`.
339 When a value in `left` is greater than the corresponding value
340 in `right`.
342 See Also
343 --------
344 interval_range : Function to create a fixed frequency IntervalIndex.
345 %(klass)s.from_breaks : Construct an %(klass)s from an array of
346 splits.
347 %(klass)s.from_tuples : Construct an %(klass)s from an
348 array-like of tuples.
350 Notes
351 -----
352 Each element of `left` must be less than or equal to the `right`
353 element at the same position. If an element is missing, it must be
354 missing in both `left` and `right`. A TypeError is raised when
355 using an unsupported type for `left` or `right`. At the moment,
356 'category', 'object', and 'string' subtypes are not supported.
358 %(examples)s\
359 """
360 )
362 @classmethod
363 @Appender(
364 _interval_shared_docs["from_arrays"]
365 % dict(
366 klass="IntervalArray",
367 examples=textwrap.dedent(
368 """\
369 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
370 <IntervalArray>
371 [(0, 1], (1, 2], (2, 3]]
372 Length: 3, closed: right, dtype: interval[int64]
373 """
374 ),
375 )
376 )
377 def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
378 left = maybe_convert_platform_interval(left)
379 right = maybe_convert_platform_interval(right)
381 return cls._simple_new(
382 left, right, closed, copy=copy, dtype=dtype, verify_integrity=True
383 )
385 _interval_shared_docs["from_tuples"] = textwrap.dedent(
386 """
387 Construct an %(klass)s from an array-like of tuples.
389 Parameters
390 ----------
391 data : array-like (1-dimensional)
392 Array of tuples.
393 closed : {'left', 'right', 'both', 'neither'}, default 'right'
394 Whether the intervals are closed on the left-side, right-side, both
395 or neither.
396 copy : bool, default False
397 By-default copy the data, this is compat only and ignored.
398 dtype : dtype or None, default None
399 If None, dtype will be inferred.
401 .. versionadded:: 0.23.0
403 Returns
404 -------
405 %(klass)s
407 See Also
408 --------
409 interval_range : Function to create a fixed frequency IntervalIndex.
410 %(klass)s.from_arrays : Construct an %(klass)s from a left and
411 right array.
412 %(klass)s.from_breaks : Construct an %(klass)s from an array of
413 splits.
415 %(examples)s\
416 """
417 )
419 @classmethod
420 @Appender(
421 _interval_shared_docs["from_tuples"]
422 % dict(
423 klass="IntervalArray",
424 examples=textwrap.dedent(
425 """\
426 Examples
427 --------
428 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
429 <IntervalArray>
430 [(0, 1], (1, 2]]
431 Length: 2, closed: right, dtype: interval[int64]
432 """
433 ),
434 )
435 )
436 def from_tuples(cls, data, closed="right", copy=False, dtype=None):
437 if len(data):
438 left, right = [], []
439 else:
440 # ensure that empty data keeps input dtype
441 left = right = data
443 for d in data:
444 if isna(d):
445 lhs = rhs = np.nan
446 else:
447 name = cls.__name__
448 try:
449 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
450 lhs, rhs = d
451 except ValueError:
452 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"
453 raise ValueError(msg)
454 except TypeError:
455 msg = f"{name}.from_tuples received an invalid item, {d}"
456 raise TypeError(msg)
457 left.append(lhs)
458 right.append(rhs)
460 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
462 def _validate(self):
463 """Verify that the IntervalArray is valid.
465 Checks that
467 * closed is valid
468 * left and right match lengths
469 * left and right have the same missing values
470 * left is always below right
471 """
472 if self.closed not in _VALID_CLOSED:
473 msg = f"invalid option for 'closed': {self.closed}"
474 raise ValueError(msg)
475 if len(self.left) != len(self.right):
476 msg = "left and right must have the same length"
477 raise ValueError(msg)
478 left_mask = notna(self.left)
479 right_mask = notna(self.right)
480 if not (left_mask == right_mask).all():
481 msg = (
482 "missing values must be missing in the same "
483 "location both left and right sides"
484 )
485 raise ValueError(msg)
486 if not (self.left[left_mask] <= self.right[left_mask]).all():
487 msg = "left side of interval must be <= right side"
488 raise ValueError(msg)
490 # ---------
491 # Interface
492 # ---------
493 def __iter__(self):
494 return iter(np.asarray(self))
496 def __len__(self) -> int:
497 return len(self.left)
499 def __getitem__(self, value):
500 value = check_array_indexer(self, value)
501 left = self.left[value]
502 right = self.right[value]
504 # scalar
505 if not isinstance(left, ABCIndexClass):
506 if is_scalar(left) and isna(left):
507 return self._fill_value
508 if np.ndim(left) > 1:
509 # GH#30588 multi-dimensional indexer disallowed
510 raise ValueError("multi-dimensional indexing not allowed")
511 return Interval(left, right, self.closed)
513 return self._shallow_copy(left, right)
515 def __setitem__(self, key, value):
516 # na value: need special casing to set directly on numpy arrays
517 needs_float_conversion = False
518 if is_scalar(value) and isna(value):
519 if is_integer_dtype(self.dtype.subtype):
520 # can't set NaN on a numpy integer array
521 needs_float_conversion = True
522 elif is_datetime64_any_dtype(self.dtype.subtype):
523 # need proper NaT to set directly on the numpy array
524 value = np.datetime64("NaT")
525 elif is_timedelta64_dtype(self.dtype.subtype):
526 # need proper NaT to set directly on the numpy array
527 value = np.timedelta64("NaT")
528 value_left, value_right = value, value
530 # scalar interval
531 elif is_interval_dtype(value) or isinstance(value, ABCInterval):
532 self._check_closed_matches(value, name="value")
533 value_left, value_right = value.left, value.right
535 else:
536 # list-like of intervals
537 try:
538 array = IntervalArray(value)
539 value_left, value_right = array.left, array.right
540 except TypeError:
541 # wrong type: not interval or NA
542 msg = f"'value' should be an interval type, got {type(value)} instead."
543 raise TypeError(msg)
545 key = check_array_indexer(self, key)
546 # Need to ensure that left and right are updated atomically, so we're
547 # forced to copy, update the copy, and swap in the new values.
548 left = self.left.copy(deep=True)
549 if needs_float_conversion:
550 left = left.astype("float")
551 left.values[key] = value_left
552 self._left = left
554 right = self.right.copy(deep=True)
555 if needs_float_conversion:
556 right = right.astype("float")
557 right.values[key] = value_right
558 self._right = right
560 def __eq__(self, other):
561 # ensure pandas array for list-like and eliminate non-interval scalars
562 if is_list_like(other):
563 if len(self) != len(other):
564 raise ValueError("Lengths must match to compare")
565 other = array(other)
566 elif not isinstance(other, Interval):
567 # non-interval scalar -> no matches
568 return np.zeros(len(self), dtype=bool)
570 # determine the dtype of the elements we want to compare
571 if isinstance(other, Interval):
572 other_dtype = "interval"
573 elif not is_categorical_dtype(other):
574 other_dtype = other.dtype
575 else:
576 # for categorical defer to categories for dtype
577 other_dtype = other.categories.dtype
579 # extract intervals if we have interval categories with matching closed
580 if is_interval_dtype(other_dtype):
581 if self.closed != other.categories.closed:
582 return np.zeros(len(self), dtype=bool)
583 other = other.categories.take(other.codes)
585 # interval-like -> need same closed and matching endpoints
586 if is_interval_dtype(other_dtype):
587 if self.closed != other.closed:
588 return np.zeros(len(self), dtype=bool)
589 return (self.left == other.left) & (self.right == other.right)
591 # non-interval/non-object dtype -> no matches
592 if not is_object_dtype(other_dtype):
593 return np.zeros(len(self), dtype=bool)
595 # object dtype -> iteratively check for intervals
596 result = np.zeros(len(self), dtype=bool)
597 for i, obj in enumerate(other):
598 # need object to be an Interval with same closed and endpoints
599 if (
600 isinstance(obj, Interval)
601 and self.closed == obj.closed
602 and self.left[i] == obj.left
603 and self.right[i] == obj.right
604 ):
605 result[i] = True
607 return result
609 def __ne__(self, other):
610 return ~self.__eq__(other)
612 def fillna(self, value=None, method=None, limit=None):
613 """
614 Fill NA/NaN values using the specified method.
616 Parameters
617 ----------
618 value : scalar, dict, Series
619 If a scalar value is passed it is used to fill all missing values.
620 Alternatively, a Series or dict can be used to fill in different
621 values for each index. The value should not be a list. The
622 value(s) passed should be either Interval objects or NA/NaN.
623 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
624 (Not implemented yet for IntervalArray)
625 Method to use for filling holes in reindexed Series
626 limit : int, default None
627 (Not implemented yet for IntervalArray)
628 If method is specified, this is the maximum number of consecutive
629 NaN values to forward/backward fill. In other words, if there is
630 a gap with more than this number of consecutive NaNs, it will only
631 be partially filled. If method is not specified, this is the
632 maximum number of entries along the entire axis where NaNs will be
633 filled.
635 Returns
636 -------
637 filled : IntervalArray with NA/NaN filled
638 """
639 if method is not None:
640 raise TypeError("Filling by method is not supported for IntervalArray.")
641 if limit is not None:
642 raise TypeError("limit is not supported for IntervalArray.")
644 if not isinstance(value, ABCInterval):
645 msg = (
646 "'IntervalArray.fillna' only supports filling with a "
647 f"scalar 'pandas.Interval'. Got a '{type(value).__name__}' instead."
648 )
649 raise TypeError(msg)
651 value = getattr(value, "_values", value)
652 self._check_closed_matches(value, name="value")
654 left = self.left.fillna(value=value.left)
655 right = self.right.fillna(value=value.right)
656 return self._shallow_copy(left, right)
658 @property
659 def dtype(self):
660 return IntervalDtype(self.left.dtype)
662 def astype(self, dtype, copy=True):
663 """
664 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
666 Parameters
667 ----------
668 dtype : str or dtype
669 Typecode or data-type to which the array is cast.
671 copy : bool, default True
672 Whether to copy the data, even if not necessary. If False,
673 a copy is made only if the old dtype does not match the
674 new dtype.
676 Returns
677 -------
678 array : ExtensionArray or ndarray
679 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
680 """
681 dtype = pandas_dtype(dtype)
682 if is_interval_dtype(dtype):
683 if dtype == self.dtype:
684 return self.copy() if copy else self
686 # need to cast to different subtype
687 try:
688 new_left = self.left.astype(dtype.subtype)
689 new_right = self.right.astype(dtype.subtype)
690 except TypeError:
691 msg = (
692 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
693 )
694 raise TypeError(msg)
695 return self._shallow_copy(new_left, new_right)
696 elif is_categorical_dtype(dtype):
697 return Categorical(np.asarray(self))
698 # TODO: This try/except will be repeated.
699 try:
700 return np.asarray(self).astype(dtype, copy=copy)
701 except (TypeError, ValueError):
702 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
703 raise TypeError(msg)
705 @classmethod
706 def _concat_same_type(cls, to_concat):
707 """
708 Concatenate multiple IntervalArray
710 Parameters
711 ----------
712 to_concat : sequence of IntervalArray
714 Returns
715 -------
716 IntervalArray
717 """
718 closed = {interval.closed for interval in to_concat}
719 if len(closed) != 1:
720 raise ValueError("Intervals must all be closed on the same side.")
721 closed = closed.pop()
723 left = np.concatenate([interval.left for interval in to_concat])
724 right = np.concatenate([interval.right for interval in to_concat])
725 return cls._simple_new(left, right, closed=closed, copy=False)
727 def _shallow_copy(self, left=None, right=None, closed=None):
728 """
729 Return a new IntervalArray with the replacement attributes
731 Parameters
732 ----------
733 left : array-like
734 Values to be used for the left-side of the the intervals.
735 If None, the existing left and right values will be used.
737 right : array-like
738 Values to be used for the right-side of the the intervals.
739 If None and left is IntervalArray-like, the left and right
740 of the IntervalArray-like will be used.
742 closed : {'left', 'right', 'both', 'neither'}, optional
743 Whether the intervals are closed on the left-side, right-side, both
744 or neither. If None, the existing closed will be used.
745 """
746 if left is None:
748 # no values passed
749 left, right = self.left, self.right
751 elif right is None:
753 # only single value passed, could be an IntervalArray
754 # or array of Intervals
755 if not isinstance(left, (type(self), ABCIntervalIndex)):
756 left = type(self)(left)
758 left, right = left.left, left.right
759 else:
761 # both left and right are values
762 pass
764 closed = closed or self.closed
765 return self._simple_new(left, right, closed=closed, verify_integrity=False)
767 def copy(self):
768 """
769 Return a copy of the array.
771 Returns
772 -------
773 IntervalArray
774 """
775 left = self.left.copy(deep=True)
776 right = self.right.copy(deep=True)
777 closed = self.closed
778 # TODO: Could skip verify_integrity here.
779 return type(self).from_arrays(left, right, closed=closed)
781 def isna(self):
782 return isna(self.left)
784 @property
785 def nbytes(self) -> int:
786 return self.left.nbytes + self.right.nbytes
788 @property
789 def size(self) -> int:
790 # Avoid materializing self.values
791 return self.left.size
793 def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray:
794 if not len(self) or periods == 0:
795 return self.copy()
797 if isna(fill_value):
798 fill_value = self.dtype.na_value
800 # ExtensionArray.shift doesn't work for two reasons
801 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.
802 # 2. IntervalArray._from_sequence only accepts NaN for missing values,
803 # not other values like NaT
805 empty_len = min(abs(periods), len(self))
806 if isna(fill_value):
807 fill_value = self.left._na_value
808 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
809 else:
810 empty = self._from_sequence([fill_value] * empty_len)
812 if periods > 0:
813 a = empty
814 b = self[:-periods]
815 else:
816 a = self[abs(periods) :]
817 b = empty
818 return self._concat_same_type([a, b])
820 def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
821 """
822 Take elements from the IntervalArray.
824 Parameters
825 ----------
826 indices : sequence of integers
827 Indices to be taken.
829 allow_fill : bool, default False
830 How to handle negative values in `indices`.
832 * False: negative values in `indices` indicate positional indices
833 from the right (the default). This is similar to
834 :func:`numpy.take`.
836 * True: negative values in `indices` indicate
837 missing values. These values are set to `fill_value`. Any other
838 other negative values raise a ``ValueError``.
840 fill_value : Interval or NA, optional
841 Fill value to use for NA-indices when `allow_fill` is True.
842 This may be ``None``, in which case the default NA value for
843 the type, ``self.dtype.na_value``, is used.
845 For many ExtensionArrays, there will be two representations of
846 `fill_value`: a user-facing "boxed" scalar, and a low-level
847 physical NA value. `fill_value` should be the user-facing version,
848 and the implementation should handle translating that to the
849 physical version for processing the take if necessary.
851 axis : any, default None
852 Present for compat with IntervalIndex; does nothing.
854 Returns
855 -------
856 IntervalArray
858 Raises
859 ------
860 IndexError
861 When the indices are out of bounds for the array.
862 ValueError
863 When `indices` contains negative values other than ``-1``
864 and `allow_fill` is True.
865 """
866 nv.validate_take(tuple(), kwargs)
868 fill_left = fill_right = fill_value
869 if allow_fill:
870 if fill_value is None:
871 fill_left = fill_right = self.left._na_value
872 elif is_interval(fill_value):
873 self._check_closed_matches(fill_value, name="fill_value")
874 fill_left, fill_right = fill_value.left, fill_value.right
875 elif not is_scalar(fill_value) and notna(fill_value):
876 msg = (
877 "'IntervalArray.fillna' only supports filling with a "
878 "'scalar pandas.Interval or NA'. "
879 f"Got a '{type(fill_value).__name__}' instead."
880 )
881 raise ValueError(msg)
883 left_take = take(
884 self.left, indices, allow_fill=allow_fill, fill_value=fill_left
885 )
886 right_take = take(
887 self.right, indices, allow_fill=allow_fill, fill_value=fill_right
888 )
890 return self._shallow_copy(left_take, right_take)
892 def value_counts(self, dropna=True):
893 """
894 Returns a Series containing counts of each interval.
896 Parameters
897 ----------
898 dropna : bool, default True
899 Don't include counts of NaN.
901 Returns
902 -------
903 counts : Series
905 See Also
906 --------
907 Series.value_counts
908 """
909 # TODO: implement this is a non-naive way!
910 return value_counts(np.asarray(self), dropna=dropna)
912 # Formatting
914 def _format_data(self):
916 # TODO: integrate with categorical and make generic
917 # name argument is unused here; just for compat with base / categorical
918 n = len(self)
919 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)
921 formatter = str
923 if n == 0:
924 summary = "[]"
925 elif n == 1:
926 first = formatter(self[0])
927 summary = f"[{first}]"
928 elif n == 2:
929 first = formatter(self[0])
930 last = formatter(self[-1])
931 summary = f"[{first}, {last}]"
932 else:
934 if n > max_seq_items:
935 n = min(max_seq_items // 2, 10)
936 head = [formatter(x) for x in self[:n]]
937 tail = [formatter(x) for x in self[-n:]]
938 head_str = ", ".join(head)
939 tail_str = ", ".join(tail)
940 summary = f"[{head_str} ... {tail_str}]"
941 else:
942 tail = [formatter(x) for x in self]
943 tail_str = ", ".join(tail)
944 summary = f"[{tail_str}]"
946 return summary
948 def __repr__(self) -> str:
949 # the short repr has no trailing newline, while the truncated
950 # repr does. So we include a newline in our template, and strip
951 # any trailing newlines from format_object_summary
952 data = self._format_data()
953 class_name = f"<{type(self).__name__}>\n"
955 template = (
956 f"{class_name}"
957 f"{data}\n"
958 f"Length: {len(self)}, closed: {self.closed}, dtype: {self.dtype}"
959 )
960 return template
962 def _format_space(self):
963 space = " " * (len(type(self).__name__) + 1)
964 return f"\n{space}"
966 @property
967 def left(self):
968 """
969 Return the left endpoints of each Interval in the IntervalArray as
970 an Index.
971 """
972 return self._left
974 @property
975 def right(self):
976 """
977 Return the right endpoints of each Interval in the IntervalArray as
978 an Index.
979 """
980 return self._right
982 @property
983 def closed(self):
984 """
985 Whether the intervals are closed on the left-side, right-side, both or
986 neither.
987 """
988 return self._closed
990 _interval_shared_docs["set_closed"] = textwrap.dedent(
991 """
992 Return an %(klass)s identical to the current one, but closed on the
993 specified side.
995 .. versionadded:: 0.24.0
997 Parameters
998 ----------
999 closed : {'left', 'right', 'both', 'neither'}
1000 Whether the intervals are closed on the left-side, right-side, both
1001 or neither.
1003 Returns
1004 -------
1005 new_index : %(klass)s
1007 %(examples)s\
1008 """
1009 )
1011 @Appender(
1012 _interval_shared_docs["set_closed"]
1013 % dict(
1014 klass="IntervalArray",
1015 examples=textwrap.dedent(
1016 """\
1017 Examples
1018 --------
1019 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
1020 >>> index
1021 <IntervalArray>
1022 [(0, 1], (1, 2], (2, 3]]
1023 Length: 3, closed: right, dtype: interval[int64]
1024 >>> index.set_closed('both')
1025 <IntervalArray>
1026 [[0, 1], [1, 2], [2, 3]]
1027 Length: 3, closed: both, dtype: interval[int64]
1028 """
1029 ),
1030 )
1031 )
1032 def set_closed(self, closed):
1033 if closed not in _VALID_CLOSED:
1034 msg = f"invalid option for 'closed': {closed}"
1035 raise ValueError(msg)
1037 return self._shallow_copy(closed=closed)
1039 @property
1040 def length(self):
1041 """
1042 Return an Index with entries denoting the length of each Interval in
1043 the IntervalArray.
1044 """
1045 try:
1046 return self.right - self.left
1047 except TypeError:
1048 # length not defined for some types, e.g. string
1049 msg = (
1050 "IntervalArray contains Intervals without defined length, "
1051 "e.g. Intervals with string endpoints"
1052 )
1053 raise TypeError(msg)
1055 @property
1056 def mid(self):
1057 """
1058 Return the midpoint of each Interval in the IntervalArray as an Index.
1059 """
1060 try:
1061 return 0.5 * (self.left + self.right)
1062 except TypeError:
1063 # datetime safe version
1064 return self.left + 0.5 * self.length
1066 _interval_shared_docs[
1067 "is_non_overlapping_monotonic"
1068 ] = """
1069 Return True if the %(klass)s is non-overlapping (no Intervals share
1070 points) and is either monotonic increasing or monotonic decreasing,
1071 else False.
1072 """
1073 # https://github.com/python/mypy/issues/1362
1074 # Mypy does not support decorated properties
1075 @property # type: ignore
1076 @Appender(
1077 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
1078 )
1079 def is_non_overlapping_monotonic(self):
1080 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
1081 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
1082 # we already require left <= right
1084 # strict inequality for closed == 'both'; equality implies overlapping
1085 # at a point when both sides of intervals are included
1086 if self.closed == "both":
1087 return bool(
1088 (self.right[:-1] < self.left[1:]).all()
1089 or (self.left[:-1] > self.right[1:]).all()
1090 )
1092 # non-strict inequality when closed != 'both'; at least one side is
1093 # not included in the intervals, so equality does not imply overlapping
1094 return bool(
1095 (self.right[:-1] <= self.left[1:]).all()
1096 or (self.left[:-1] >= self.right[1:]).all()
1097 )
1099 # Conversion
1100 def __array__(self, dtype=None) -> np.ndarray:
1101 """
1102 Return the IntervalArray's data as a numpy array of Interval
1103 objects (with dtype='object')
1104 """
1105 left = self.left
1106 right = self.right
1107 mask = self.isna()
1108 closed = self._closed
1110 result = np.empty(len(left), dtype=object)
1111 for i in range(len(left)):
1112 if mask[i]:
1113 result[i] = np.nan
1114 else:
1115 result[i] = Interval(left[i], right[i], closed)
1116 return result
1118 def __arrow_array__(self, type=None):
1119 """
1120 Convert myself into a pyarrow Array.
1121 """
1122 import pyarrow
1123 from pandas.core.arrays._arrow_utils import ArrowIntervalType
1125 try:
1126 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
1127 except TypeError:
1128 raise TypeError(
1129 "Conversion to arrow with subtype '{}' "
1130 "is not supported".format(self.dtype.subtype)
1131 )
1132 interval_type = ArrowIntervalType(subtype, self.closed)
1133 storage_array = pyarrow.StructArray.from_arrays(
1134 [
1135 pyarrow.array(self.left, type=subtype, from_pandas=True),
1136 pyarrow.array(self.right, type=subtype, from_pandas=True),
1137 ],
1138 names=["left", "right"],
1139 )
1140 mask = self.isna()
1141 if mask.any():
1142 # if there are missing values, set validity bitmap also on the array level
1143 null_bitmap = pyarrow.array(~mask).buffers()[1]
1144 storage_array = pyarrow.StructArray.from_buffers(
1145 storage_array.type,
1146 len(storage_array),
1147 [null_bitmap],
1148 children=[storage_array.field(0), storage_array.field(1)],
1149 )
1151 if type is not None:
1152 if type.equals(interval_type.storage_type):
1153 return storage_array
1154 elif isinstance(type, ArrowIntervalType):
1155 # ensure we have the same subtype and closed attributes
1156 if not type.equals(interval_type):
1157 raise TypeError(
1158 "Not supported to convert IntervalArray to type with "
1159 "different 'subtype' ({0} vs {1}) and 'closed' ({2} vs {3}) "
1160 "attributes".format(
1161 self.dtype.subtype, type.subtype, self.closed, type.closed
1162 )
1163 )
1164 else:
1165 raise TypeError(
1166 "Not supported to convert IntervalArray to '{0}' type".format(type)
1167 )
1169 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
1171 _interval_shared_docs[
1172 "to_tuples"
1173 ] = """
1174 Return an %(return_type)s of tuples of the form (left, right).
1176 Parameters
1177 ----------
1178 na_tuple : boolean, default True
1179 Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
1180 value itself if False, ``nan``.
1182 .. versionadded:: 0.23.0
1184 Returns
1185 -------
1186 tuples: %(return_type)s
1187 %(examples)s\
1188 """
1190 @Appender(
1191 _interval_shared_docs["to_tuples"] % dict(return_type="ndarray", examples="")
1192 )
1193 def to_tuples(self, na_tuple=True):
1194 tuples = com.asarray_tuplesafe(zip(self.left, self.right))
1195 if not na_tuple:
1196 # GH 18756
1197 tuples = np.where(~self.isna(), tuples, np.nan)
1198 return tuples
1200 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
1201 def repeat(self, repeats, axis=None):
1202 nv.validate_repeat(tuple(), dict(axis=axis))
1203 left_repeat = self.left.repeat(repeats)
1204 right_repeat = self.right.repeat(repeats)
1205 return self._shallow_copy(left=left_repeat, right=right_repeat)
1207 _interval_shared_docs["contains"] = textwrap.dedent(
1208 """
1209 Check elementwise if the Intervals contain the value.
1211 Return a boolean mask whether the value is contained in the Intervals
1212 of the %(klass)s.
1214 .. versionadded:: 0.25.0
1216 Parameters
1217 ----------
1218 other : scalar
1219 The value to check whether it is contained in the Intervals.
1221 Returns
1222 -------
1223 boolean array
1225 See Also
1226 --------
1227 Interval.contains : Check whether Interval object contains value.
1228 %(klass)s.overlaps : Check if an Interval overlaps the values in the
1229 %(klass)s.
1231 Examples
1232 --------
1233 %(examples)s
1234 >>> intervals.contains(0.5)
1235 array([ True, False, False])
1236 """
1237 )
1239 @Appender(
1240 _interval_shared_docs["contains"]
1241 % dict(
1242 klass="IntervalArray",
1243 examples=textwrap.dedent(
1244 """\
1245 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
1246 >>> intervals
1247 <IntervalArray>
1248 [(0, 1], (1, 3], (2, 4]]
1249 Length: 3, closed: right, dtype: interval[int64]
1250 """
1251 ),
1252 )
1253 )
1254 def contains(self, other):
1255 if isinstance(other, Interval):
1256 raise NotImplementedError("contains not implemented for two intervals")
1258 return (self.left < other if self.open_left else self.left <= other) & (
1259 other < self.right if self.open_right else other <= self.right
1260 )
1262 _interval_shared_docs["overlaps"] = textwrap.dedent(
1263 """
1264 Check elementwise if an Interval overlaps the values in the %(klass)s.
1266 Two intervals overlap if they share a common point, including closed
1267 endpoints. Intervals that only have an open endpoint in common do not
1268 overlap.
1270 .. versionadded:: 0.24.0
1272 Parameters
1273 ----------
1274 other : %(klass)s
1275 Interval to check against for an overlap.
1277 Returns
1278 -------
1279 ndarray
1280 Boolean array positionally indicating where an overlap occurs.
1282 See Also
1283 --------
1284 Interval.overlaps : Check whether two Interval objects overlap.
1286 Examples
1287 --------
1288 %(examples)s
1289 >>> intervals.overlaps(pd.Interval(0.5, 1.5))
1290 array([ True, True, False])
1292 Intervals that share closed endpoints overlap:
1294 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
1295 array([ True, True, True])
1297 Intervals that only have an open endpoint in common do not overlap:
1299 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
1300 array([False, True, False])
1301 """
1302 )
1304 @Appender(
1305 _interval_shared_docs["overlaps"]
1306 % dict(
1307 klass="IntervalArray",
1308 examples=textwrap.dedent(
1309 """\
1310 >>> data = [(0, 1), (1, 3), (2, 4)]
1311 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
1312 >>> intervals
1313 <IntervalArray>
1314 [(0, 1], (1, 3], (2, 4]]
1315 Length: 3, closed: right, dtype: interval[int64]
1316 """
1317 ),
1318 )
1319 )
1320 def overlaps(self, other):
1321 if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1322 raise NotImplementedError
1323 elif not isinstance(other, Interval):
1324 msg = f"`other` must be Interval-like, got {type(other).__name__}"
1325 raise TypeError(msg)
1327 # equality is okay if both endpoints are closed (overlap at a point)
1328 op1 = le if (self.closed_left and other.closed_right) else lt
1329 op2 = le if (other.closed_left and self.closed_right) else lt
1331 # overlaps is equivalent negation of two interval being disjoint:
1332 # disjoint = (A.left > B.right) or (B.left > A.right)
1333 # (simplifying the negation allows this to be done in less operations)
1334 return op1(self.left, other.right) & op2(other.left, self.right)
1337def maybe_convert_platform_interval(values):
1338 """
1339 Try to do platform conversion, with special casing for IntervalArray.
1340 Wrapper around maybe_convert_platform that alters the default return
1341 dtype in certain cases to be compatible with IntervalArray. For example,
1342 empty lists return with integer dtype instead of object dtype, which is
1343 prohibited for IntervalArray.
1345 Parameters
1346 ----------
1347 values : array-like
1349 Returns
1350 -------
1351 array
1352 """
1353 if isinstance(values, (list, tuple)) and len(values) == 0:
1354 # GH 19016
1355 # empty lists/tuples get object dtype by default, but this is
1356 # prohibited for IntervalArray, so coerce to integer instead
1357 return np.array([], dtype=np.int64)
1358 elif is_categorical_dtype(values):
1359 values = np.asarray(values)
1361 return maybe_convert_platform(values)