Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py : 40%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" define extension dtypes """
2import re
3from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
5import numpy as np
6import pytz
8from pandas._libs.interval import Interval
9from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
10from pandas._typing import Ordered
12from pandas.core.dtypes.base import ExtensionDtype
13from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass
14from pandas.core.dtypes.inference import is_bool, is_list_like
16str_type = str
19def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
20 """
21 Register an ExtensionType with pandas as class decorator.
23 .. versionadded:: 0.24.0
25 This enables operations like ``.astype(name)`` for the name
26 of the ExtensionDtype.
28 Returns
29 -------
30 callable
31 A class decorator.
33 Examples
34 --------
35 >>> from pandas.api.extensions import register_extension_dtype
36 >>> from pandas.api.extensions import ExtensionDtype
37 >>> @register_extension_dtype
38 ... class MyExtensionDtype(ExtensionDtype):
39 ... pass
40 """
41 registry.register(cls)
42 return cls
45class Registry:
46 """
47 Registry for dtype inference.
49 The registry allows one to map a string repr of a extension
50 dtype to an extension dtype. The string alias can be used in several
51 places, including
53 * Series and Index constructors
54 * :meth:`pandas.array`
55 * :meth:`pandas.Series.astype`
57 Multiple extension types can be registered.
58 These are tried in order.
59 """
61 def __init__(self):
62 self.dtypes: List[Type[ExtensionDtype]] = []
64 def register(self, dtype: Type[ExtensionDtype]) -> None:
65 """
66 Parameters
67 ----------
68 dtype : ExtensionDtype
69 """
70 if not issubclass(dtype, ExtensionDtype):
71 raise ValueError("can only register pandas extension dtypes")
73 self.dtypes.append(dtype)
75 def find(
76 self, dtype: Union[Type[ExtensionDtype], str]
77 ) -> Optional[Type[ExtensionDtype]]:
78 """
79 Parameters
80 ----------
81 dtype : Type[ExtensionDtype] or str
83 Returns
84 -------
85 return the first matching dtype, otherwise return None
86 """
87 if not isinstance(dtype, str):
88 dtype_type = dtype
89 if not isinstance(dtype, type):
90 dtype_type = type(dtype)
91 if issubclass(dtype_type, ExtensionDtype):
92 return dtype
94 return None
96 for dtype_type in self.dtypes:
97 try:
98 return dtype_type.construct_from_string(dtype)
99 except TypeError:
100 pass
102 return None
105registry = Registry()
108class PandasExtensionDtype(ExtensionDtype):
109 """
110 A np.dtype duck-typed class, suitable for holding a custom dtype.
112 THIS IS NOT A REAL NUMPY DTYPE
113 """
115 type: Any
116 kind: Any
117 # The Any type annotations above are here only because mypy seems to have a
118 # problem dealing with with multiple inheritance from PandasExtensionDtype
119 # and ExtensionDtype's @properties in the subclasses below. The kind and
120 # type variables in those subclasses are explicitly typed below.
121 subdtype = None
122 str: Optional[str_type] = None
123 num = 100
124 shape: Tuple[int, ...] = tuple()
125 itemsize = 8
126 base = None
127 isbuiltin = 0
128 isnative = 0
129 _cache: Dict[str_type, "PandasExtensionDtype"] = {}
131 def __str__(self) -> str_type:
132 """
133 Return a string representation for a particular Object
134 """
135 return self.name
137 def __repr__(self) -> str_type:
138 """
139 Return a string representation for a particular object.
140 """
141 return str(self)
143 def __hash__(self) -> int:
144 raise NotImplementedError("sub-classes should implement an __hash__ method")
146 def __getstate__(self) -> Dict[str_type, Any]:
147 # pickle support; we don't want to pickle the cache
148 return {k: getattr(self, k, None) for k in self._metadata}
150 @classmethod
151 def reset_cache(cls) -> None:
152 """ clear the cache """
153 cls._cache = {}
156class CategoricalDtypeType(type):
157 """
158 the type of CategoricalDtype, this metaclass determines subclass ability
159 """
161 pass
164@register_extension_dtype
165class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
166 """
167 Type for categorical data with the categories and orderedness.
169 .. versionchanged:: 0.21.0
171 Parameters
172 ----------
173 categories : sequence, optional
174 Must be unique, and must not contain any nulls.
175 ordered : bool or None, default False
176 Whether or not this categorical is treated as a ordered categorical.
177 None can be used to maintain the ordered value of existing categoricals when
178 used in operations that combine categoricals, e.g. astype, and will resolve to
179 False if there is no existing ordered to maintain.
181 Attributes
182 ----------
183 categories
184 ordered
186 Methods
187 -------
188 None
190 See Also
191 --------
192 Categorical
194 Notes
195 -----
196 This class is useful for specifying the type of a ``Categorical``
197 independent of the values. See :ref:`categorical.categoricaldtype`
198 for more.
200 Examples
201 --------
202 >>> t = pd.CategoricalDtype(categories=['b', 'a'], ordered=True)
203 >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t)
204 0 a
205 1 b
206 2 a
207 3 NaN
208 dtype: category
209 Categories (2, object): [b < a]
210 """
212 # TODO: Document public vs. private API
213 name = "category"
214 type: Type[CategoricalDtypeType] = CategoricalDtypeType
215 kind: str_type = "O"
216 str = "|O08"
217 base = np.dtype("O")
218 _metadata = ("categories", "ordered")
219 _cache: Dict[str_type, PandasExtensionDtype] = {}
221 def __init__(self, categories=None, ordered: Ordered = False):
222 self._finalize(categories, ordered, fastpath=False)
224 @classmethod
225 def _from_fastpath(
226 cls, categories=None, ordered: Optional[bool] = None
227 ) -> "CategoricalDtype":
228 self = cls.__new__(cls)
229 self._finalize(categories, ordered, fastpath=True)
230 return self
232 @classmethod
233 def _from_categorical_dtype(
234 cls, dtype: "CategoricalDtype", categories=None, ordered: Ordered = None
235 ) -> "CategoricalDtype":
236 if categories is ordered is None:
237 return dtype
238 if categories is None:
239 categories = dtype.categories
240 if ordered is None:
241 ordered = dtype.ordered
242 return cls(categories, ordered)
244 @classmethod
245 def _from_values_or_dtype(
246 cls,
247 values=None,
248 categories=None,
249 ordered: Optional[bool] = None,
250 dtype: Optional["CategoricalDtype"] = None,
251 ) -> "CategoricalDtype":
252 """
253 Construct dtype from the input parameters used in :class:`Categorical`.
255 This constructor method specifically does not do the factorization
256 step, if that is needed to find the categories. This constructor may
257 therefore return ``CategoricalDtype(categories=None, ordered=None)``,
258 which may not be useful. Additional steps may therefore have to be
259 taken to create the final dtype.
261 The return dtype is specified from the inputs in this prioritized
262 order:
263 1. if dtype is a CategoricalDtype, return dtype
264 2. if dtype is the string 'category', create a CategoricalDtype from
265 the supplied categories and ordered parameters, and return that.
266 3. if values is a categorical, use value.dtype, but override it with
267 categories and ordered if either/both of those are not None.
268 4. if dtype is None and values is not a categorical, construct the
269 dtype from categories and ordered, even if either of those is None.
271 Parameters
272 ----------
273 values : list-like, optional
274 The list-like must be 1-dimensional.
275 categories : list-like, optional
276 Categories for the CategoricalDtype.
277 ordered : bool, optional
278 Designating if the categories are ordered.
279 dtype : CategoricalDtype or the string "category", optional
280 If ``CategoricalDtype``, cannot be used together with
281 `categories` or `ordered`.
283 Returns
284 -------
285 CategoricalDtype
287 Examples
288 --------
289 >>> CategoricalDtype._from_values_or_dtype()
290 CategoricalDtype(categories=None, ordered=None)
291 >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'],
292 ... ordered=True)
293 CategoricalDtype(categories=['a', 'b'], ordered=True)
294 >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True)
295 >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False)
296 >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True)
297 >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True,
298 ... dtype=dtype2)
299 ValueError: Cannot specify `categories` or `ordered` together with
300 `dtype`.
302 The supplied dtype takes precedence over values' dtype:
304 >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2)
305 CategoricalDtype(['x', 'y'], ordered=False)
306 """
307 from pandas.core.dtypes.common import is_categorical
309 if dtype is not None:
310 # The dtype argument takes precedence over values.dtype (if any)
311 if isinstance(dtype, str):
312 if dtype == "category":
313 dtype = CategoricalDtype(categories, ordered)
314 else:
315 raise ValueError(f"Unknown dtype {repr(dtype)}")
316 elif categories is not None or ordered is not None:
317 raise ValueError(
318 "Cannot specify `categories` or `ordered` together with `dtype`."
319 )
320 elif is_categorical(values):
321 # If no "dtype" was passed, use the one from "values", but honor
322 # the "ordered" and "categories" arguments
323 dtype = values.dtype._from_categorical_dtype(
324 values.dtype, categories, ordered
325 )
326 else:
327 # If dtype=None and values is not categorical, create a new dtype.
328 # Note: This could potentially have categories=None and
329 # ordered=None.
330 dtype = CategoricalDtype(categories, ordered)
332 return dtype
334 @classmethod
335 def construct_from_string(cls, string: str_type) -> "CategoricalDtype":
336 """
337 Construct a CategoricalDtype from a string.
339 Parameters
340 ----------
341 string : str
342 Must be the string "category" in order to be successfully constructed.
344 Returns
345 -------
346 CategoricalDtype
347 Instance of the dtype.
349 Raises
350 ------
351 TypeError
352 If a CategoricalDtype cannot be constructed from the input.
353 """
354 if not isinstance(string, str):
355 raise TypeError(f"Expects a string, got {type(string)}")
356 if string != cls.name:
357 raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'")
359 # need ordered=None to ensure that operations specifying dtype="category" don't
360 # override the ordered value for existing categoricals
361 return cls(ordered=None)
363 def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None:
365 if ordered is not None:
366 self.validate_ordered(ordered)
368 if categories is not None:
369 categories = self.validate_categories(categories, fastpath=fastpath)
371 self._categories = categories
372 self._ordered = ordered
374 def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
375 # for pickle compat. __get_state__ is defined in the
376 # PandasExtensionDtype superclass and uses the public properties to
377 # pickle -> need to set the settable private ones here (see GH26067)
378 self._categories = state.pop("categories", None)
379 self._ordered = state.pop("ordered", False)
381 def __hash__(self) -> int:
382 # _hash_categories returns a uint64, so use the negative
383 # space for when we have unknown categories to avoid a conflict
384 if self.categories is None:
385 if self.ordered:
386 return -1
387 else:
388 return -2
389 # We *do* want to include the real self.ordered here
390 return int(self._hash_categories(self.categories, self.ordered))
392 def __eq__(self, other: Any) -> bool:
393 """
394 Rules for CDT equality:
395 1) Any CDT is equal to the string 'category'
396 2) Any CDT is equal to itself
397 3) Any CDT is equal to a CDT with categories=None regardless of ordered
398 4) A CDT with ordered=True is only equal to another CDT with
399 ordered=True and identical categories in the same order
400 5) A CDT with ordered={False, None} is only equal to another CDT with
401 ordered={False, None} and identical categories, but same order is
402 not required. There is no distinction between False/None.
403 6) Any other comparison returns False
404 """
405 if isinstance(other, str):
406 return other == self.name
407 elif other is self:
408 return True
409 elif not (hasattr(other, "ordered") and hasattr(other, "categories")):
410 return False
411 elif self.categories is None or other.categories is None:
412 # We're forced into a suboptimal corner thanks to math and
413 # backwards compatibility. We require that `CDT(...) == 'category'`
414 # for all CDTs **including** `CDT(None, ...)`. Therefore, *all*
415 # CDT(., .) = CDT(None, False) and *all*
416 # CDT(., .) = CDT(None, True).
417 return True
418 elif self.ordered or other.ordered:
419 # At least one has ordered=True; equal if both have ordered=True
420 # and the same values for categories in the same order.
421 return (self.ordered == other.ordered) and self.categories.equals(
422 other.categories
423 )
424 else:
425 # Neither has ordered=True; equal if both have the same categories,
426 # but same order is not necessary. There is no distinction between
427 # ordered=False and ordered=None: CDT(., False) and CDT(., None)
428 # will be equal if they have the same categories.
429 if (
430 self.categories.dtype == other.categories.dtype
431 and self.categories.equals(other.categories)
432 ):
433 # Check and see if they happen to be identical categories
434 return True
435 return hash(self) == hash(other)
437 def __repr__(self) -> str_type:
438 tpl = "CategoricalDtype(categories={data}ordered={ordered})"
439 if self.categories is None:
440 data = "None, "
441 else:
442 data = self.categories._format_data(name=type(self).__name__)
443 return tpl.format(data=data, ordered=self.ordered)
445 @staticmethod
446 def _hash_categories(categories, ordered: Ordered = True) -> int:
447 from pandas.core.util.hashing import (
448 hash_array,
449 _combine_hash_arrays,
450 hash_tuples,
451 )
452 from pandas.core.dtypes.common import is_datetime64tz_dtype, _NS_DTYPE
454 if len(categories) and isinstance(categories[0], tuple):
455 # assumes if any individual category is a tuple, then all our. ATM
456 # I don't really want to support just some of the categories being
457 # tuples.
458 categories = list(categories) # breaks if a np.array of categories
459 cat_array = hash_tuples(categories)
460 else:
461 if categories.dtype == "O":
462 if len({type(x) for x in categories}) != 1:
463 # TODO: hash_array doesn't handle mixed types. It casts
464 # everything to a str first, which means we treat
465 # {'1', '2'} the same as {'1', 2}
466 # find a better solution
467 hashed = hash((tuple(categories), ordered))
468 return hashed
470 if is_datetime64tz_dtype(categories.dtype):
471 # Avoid future warning.
472 categories = categories.astype(_NS_DTYPE)
474 cat_array = hash_array(np.asarray(categories), categorize=False)
475 if ordered:
476 cat_array = np.vstack(
477 [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]
478 )
479 else:
480 cat_array = [cat_array]
481 hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
482 return np.bitwise_xor.reduce(hashed)
484 @classmethod
485 def construct_array_type(cls):
486 """
487 Return the array type associated with this dtype.
489 Returns
490 -------
491 type
492 """
493 from pandas import Categorical
495 return Categorical
497 @staticmethod
498 def validate_ordered(ordered: Ordered) -> None:
499 """
500 Validates that we have a valid ordered parameter. If
501 it is not a boolean, a TypeError will be raised.
503 Parameters
504 ----------
505 ordered : object
506 The parameter to be verified.
508 Raises
509 ------
510 TypeError
511 If 'ordered' is not a boolean.
512 """
513 if not is_bool(ordered):
514 raise TypeError("'ordered' must either be 'True' or 'False'")
516 @staticmethod
517 def validate_categories(categories, fastpath: bool = False):
518 """
519 Validates that we have good categories
521 Parameters
522 ----------
523 categories : array-like
524 fastpath : bool
525 Whether to skip nan and uniqueness checks
527 Returns
528 -------
529 categories : Index
530 """
531 from pandas.core.indexes.base import Index
533 if not fastpath and not is_list_like(categories):
534 raise TypeError(
535 f"Parameter 'categories' must be list-like, was {repr(categories)}"
536 )
537 elif not isinstance(categories, ABCIndexClass):
538 categories = Index(categories, tupleize_cols=False)
540 if not fastpath:
542 if categories.hasnans:
543 raise ValueError("Categorial categories cannot be null")
545 if not categories.is_unique:
546 raise ValueError("Categorical categories must be unique")
548 if isinstance(categories, ABCCategoricalIndex):
549 categories = categories.categories
551 return categories
553 def update_dtype(
554 self, dtype: Union[str_type, "CategoricalDtype"]
555 ) -> "CategoricalDtype":
556 """
557 Returns a CategoricalDtype with categories and ordered taken from dtype
558 if specified, otherwise falling back to self if unspecified
560 Parameters
561 ----------
562 dtype : CategoricalDtype
564 Returns
565 -------
566 new_dtype : CategoricalDtype
567 """
568 if isinstance(dtype, str) and dtype == "category":
569 # dtype='category' should not change anything
570 return self
571 elif not self.is_dtype(dtype):
572 raise ValueError(
573 f"a CategoricalDtype must be passed to perform an update, "
574 f"got {repr(dtype)}"
575 )
576 else:
577 # from here on, dtype is a CategoricalDtype
578 dtype = cast(CategoricalDtype, dtype)
580 # update categories/ordered unless they've been explicitly passed as None
581 new_categories = (
582 dtype.categories if dtype.categories is not None else self.categories
583 )
584 new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered
586 return CategoricalDtype(new_categories, new_ordered)
588 @property
589 def categories(self):
590 """
591 An ``Index`` containing the unique categories allowed.
592 """
593 return self._categories
595 @property
596 def ordered(self) -> Ordered:
597 """
598 Whether the categories have an ordered relationship.
599 """
600 return self._ordered
602 @property
603 def _is_boolean(self) -> bool:
604 from pandas.core.dtypes.common import is_bool_dtype
606 return is_bool_dtype(self.categories)
609@register_extension_dtype
610class DatetimeTZDtype(PandasExtensionDtype):
611 """
612 An ExtensionDtype for timezone-aware datetime data.
614 **This is not an actual numpy dtype**, but a duck type.
616 Parameters
617 ----------
618 unit : str, default "ns"
619 The precision of the datetime data. Currently limited
620 to ``"ns"``.
621 tz : str, int, or datetime.tzinfo
622 The timezone.
624 Attributes
625 ----------
626 unit
627 tz
629 Methods
630 -------
631 None
633 Raises
634 ------
635 pytz.UnknownTimeZoneError
636 When the requested timezone cannot be found.
638 Examples
639 --------
640 >>> pd.DatetimeTZDtype(tz='UTC')
641 datetime64[ns, UTC]
643 >>> pd.DatetimeTZDtype(tz='dateutil/US/Central')
644 datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
645 """
647 type: Type[Timestamp] = Timestamp
648 kind: str_type = "M"
649 str = "|M8[ns]"
650 num = 101
651 base = np.dtype("M8[ns]")
652 na_value = NaT
653 _metadata = ("unit", "tz")
654 _match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
655 _cache: Dict[str_type, PandasExtensionDtype] = {}
657 def __init__(self, unit="ns", tz=None):
658 if isinstance(unit, DatetimeTZDtype):
659 unit, tz = unit.unit, unit.tz
661 if unit != "ns":
662 if isinstance(unit, str) and tz is None:
663 # maybe a string like datetime64[ns, tz], which we support for
664 # now.
665 result = type(self).construct_from_string(unit)
666 unit = result.unit
667 tz = result.tz
668 msg = (
669 f"Passing a dtype alias like 'datetime64[ns, {tz}]' "
670 "to DatetimeTZDtype is no longer supported. Use "
671 "'DatetimeTZDtype.construct_from_string()' instead."
672 )
673 raise ValueError(msg)
674 else:
675 raise ValueError("DatetimeTZDtype only supports ns units")
677 if tz:
678 tz = timezones.maybe_get_tz(tz)
679 tz = timezones.tz_standardize(tz)
680 elif tz is not None:
681 raise pytz.UnknownTimeZoneError(tz)
682 if tz is None:
683 raise TypeError("A 'tz' is required.")
685 self._unit = unit
686 self._tz = tz
688 @property
689 def unit(self):
690 """
691 The precision of the datetime data.
692 """
693 return self._unit
695 @property
696 def tz(self):
697 """
698 The timezone.
699 """
700 return self._tz
702 @classmethod
703 def construct_array_type(cls):
704 """
705 Return the array type associated with this dtype.
707 Returns
708 -------
709 type
710 """
711 from pandas.core.arrays import DatetimeArray
713 return DatetimeArray
715 @classmethod
716 def construct_from_string(cls, string: str_type):
717 """
718 Construct a DatetimeTZDtype from a string.
720 Parameters
721 ----------
722 string : str
723 The string alias for this DatetimeTZDtype.
724 Should be formatted like ``datetime64[ns, <tz>]``,
725 where ``<tz>`` is the timezone name.
727 Examples
728 --------
729 >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]')
730 datetime64[ns, UTC]
731 """
732 if isinstance(string, str):
733 msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'"
734 match = cls._match.match(string)
735 if match:
736 d = match.groupdict()
737 try:
738 return cls(unit=d["unit"], tz=d["tz"])
739 except (KeyError, TypeError, ValueError) as err:
740 # KeyError if maybe_get_tz tries and fails to get a
741 # pytz timezone (actually pytz.UnknownTimeZoneError).
742 # TypeError if we pass a nonsense tz;
743 # ValueError if we pass a unit other than "ns"
744 raise TypeError(msg) from err
745 raise TypeError(msg)
747 raise TypeError("Cannot construct a 'DatetimeTZDtype'")
749 def __str__(self) -> str_type:
750 return f"datetime64[{self.unit}, {self.tz}]"
752 @property
753 def name(self) -> str_type:
754 """A string representation of the dtype."""
755 return str(self)
757 def __hash__(self) -> int:
758 # make myself hashable
759 # TODO: update this.
760 return hash(str(self))
762 def __eq__(self, other: Any) -> bool:
763 if isinstance(other, str):
764 return other == self.name
766 return (
767 isinstance(other, DatetimeTZDtype)
768 and self.unit == other.unit
769 and str(self.tz) == str(other.tz)
770 )
772 def __setstate__(self, state):
773 # for pickle compat. __get_state__ is defined in the
774 # PandasExtensionDtype superclass and uses the public properties to
775 # pickle -> need to set the settable private ones here (see GH26067)
776 self._tz = state["tz"]
777 self._unit = state["unit"]
780@register_extension_dtype
781class PeriodDtype(PandasExtensionDtype):
782 """
783 An ExtensionDtype for Period data.
785 **This is not an actual numpy dtype**, but a duck type.
787 Parameters
788 ----------
789 freq : str or DateOffset
790 The frequency of this PeriodDtype.
792 Attributes
793 ----------
794 freq
796 Methods
797 -------
798 None
800 Examples
801 --------
802 >>> pd.PeriodDtype(freq='D')
803 period[D]
805 >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd())
806 period[M]
807 """
809 type: Type[Period] = Period
810 kind: str_type = "O"
811 str = "|O08"
812 base = np.dtype("O")
813 num = 102
814 _metadata = ("freq",)
815 _match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
816 _cache: Dict[str_type, PandasExtensionDtype] = {}
818 def __new__(cls, freq=None):
819 """
820 Parameters
821 ----------
822 freq : frequency
823 """
825 if isinstance(freq, PeriodDtype):
826 return freq
828 elif freq is None:
829 # empty constructor for pickle compat
830 u = object.__new__(cls)
831 u._freq = None
832 return u
834 if not isinstance(freq, ABCDateOffset):
835 freq = cls._parse_dtype_strict(freq)
837 try:
838 return cls._cache[freq.freqstr]
839 except KeyError:
840 u = object.__new__(cls)
841 u._freq = freq
842 cls._cache[freq.freqstr] = u
843 return u
845 @property
846 def freq(self):
847 """
848 The frequency object of this PeriodDtype.
849 """
850 return self._freq
852 @classmethod
853 def _parse_dtype_strict(cls, freq):
854 if isinstance(freq, str):
855 if freq.startswith("period[") or freq.startswith("Period["):
856 m = cls._match.search(freq)
857 if m is not None:
858 freq = m.group("freq")
859 from pandas.tseries.frequencies import to_offset
861 freq = to_offset(freq)
862 if freq is not None:
863 return freq
865 raise ValueError("could not construct PeriodDtype")
867 @classmethod
868 def construct_from_string(cls, string):
869 """
870 Strict construction from a string, raise a TypeError if not
871 possible
872 """
873 if (
874 isinstance(string, str)
875 and (string.startswith("period[") or string.startswith("Period["))
876 or isinstance(string, ABCDateOffset)
877 ):
878 # do not parse string like U as period[U]
879 # avoid tuple to be regarded as freq
880 try:
881 return cls(freq=string)
882 except ValueError:
883 pass
884 if isinstance(string, str):
885 msg = f"Cannot construct a 'PeriodDtype' from '{string}'"
886 else:
887 msg = f"'construct_from_string' expects a string, got {type(string)}"
888 raise TypeError(msg)
890 def __str__(self) -> str_type:
891 return self.name
893 @property
894 def name(self) -> str_type:
895 return f"period[{self.freq.freqstr}]"
897 @property
898 def na_value(self):
899 return NaT
901 def __hash__(self) -> int:
902 # make myself hashable
903 return hash(str(self))
905 def __eq__(self, other: Any) -> bool:
906 if isinstance(other, str):
907 return other == self.name or other == self.name.title()
909 return isinstance(other, PeriodDtype) and self.freq == other.freq
911 def __setstate__(self, state):
912 # for pickle compat. __get_state__ is defined in the
913 # PandasExtensionDtype superclass and uses the public properties to
914 # pickle -> need to set the settable private ones here (see GH26067)
915 self._freq = state["freq"]
917 @classmethod
918 def is_dtype(cls, dtype) -> bool:
919 """
920 Return a boolean if we if the passed type is an actual dtype that we
921 can match (via string or type)
922 """
924 if isinstance(dtype, str):
925 # PeriodDtype can be instantiated from freq string like "U",
926 # but doesn't regard freq str like "U" as dtype.
927 if dtype.startswith("period[") or dtype.startswith("Period["):
928 try:
929 if cls._parse_dtype_strict(dtype) is not None:
930 return True
931 else:
932 return False
933 except ValueError:
934 return False
935 else:
936 return False
937 return super().is_dtype(dtype)
939 @classmethod
940 def construct_array_type(cls):
941 """
942 Return the array type associated with this dtype.
944 Returns
945 -------
946 type
947 """
948 from pandas.core.arrays import PeriodArray
950 return PeriodArray
952 def __from_arrow__(self, array):
953 """Construct PeriodArray from pyarrow Array/ChunkedArray."""
954 import pyarrow
955 from pandas.core.arrays import PeriodArray
956 from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
958 if isinstance(array, pyarrow.Array):
959 chunks = [array]
960 else:
961 chunks = array.chunks
963 results = []
964 for arr in chunks:
965 data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype="int64")
966 parr = PeriodArray(data.copy(), freq=self.freq, copy=False)
967 parr[~mask] = NaT
968 results.append(parr)
970 return PeriodArray._concat_same_type(results)
973@register_extension_dtype
974class IntervalDtype(PandasExtensionDtype):
975 """
976 An ExtensionDtype for Interval data.
978 **This is not an actual numpy dtype**, but a duck type.
980 Parameters
981 ----------
982 subtype : str, np.dtype
983 The dtype of the Interval bounds.
985 Attributes
986 ----------
987 subtype
989 Methods
990 -------
991 None
993 Examples
994 --------
995 >>> pd.IntervalDtype(subtype='int64')
996 interval[int64]
997 """
999 name = "interval"
1000 kind: str_type = "O"
1001 str = "|O08"
1002 base = np.dtype("O")
1003 num = 103
1004 _metadata = ("subtype",)
1005 _match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
1006 _cache: Dict[str_type, PandasExtensionDtype] = {}
1008 def __new__(cls, subtype=None):
1009 from pandas.core.dtypes.common import (
1010 is_categorical_dtype,
1011 is_string_dtype,
1012 pandas_dtype,
1013 )
1015 if isinstance(subtype, IntervalDtype):
1016 return subtype
1017 elif subtype is None:
1018 # we are called as an empty constructor
1019 # generally for pickle compat
1020 u = object.__new__(cls)
1021 u._subtype = None
1022 return u
1023 elif isinstance(subtype, str) and subtype.lower() == "interval":
1024 subtype = None
1025 else:
1026 if isinstance(subtype, str):
1027 m = cls._match.search(subtype)
1028 if m is not None:
1029 subtype = m.group("subtype")
1031 try:
1032 subtype = pandas_dtype(subtype)
1033 except TypeError:
1034 raise TypeError("could not construct IntervalDtype")
1036 if is_categorical_dtype(subtype) or is_string_dtype(subtype):
1037 # GH 19016
1038 msg = (
1039 "category, object, and string subtypes are not supported "
1040 "for IntervalDtype"
1041 )
1042 raise TypeError(msg)
1044 try:
1045 return cls._cache[str(subtype)]
1046 except KeyError:
1047 u = object.__new__(cls)
1048 u._subtype = subtype
1049 cls._cache[str(subtype)] = u
1050 return u
1052 @property
1053 def subtype(self):
1054 """
1055 The dtype of the Interval bounds.
1056 """
1057 return self._subtype
1059 @classmethod
1060 def construct_array_type(cls):
1061 """
1062 Return the array type associated with this dtype.
1064 Returns
1065 -------
1066 type
1067 """
1068 from pandas.core.arrays import IntervalArray
1070 return IntervalArray
1072 @classmethod
1073 def construct_from_string(cls, string):
1074 """
1075 attempt to construct this type from a string, raise a TypeError
1076 if its not possible
1077 """
1078 if not isinstance(string, str):
1079 raise TypeError(f"a string needs to be passed, got type {type(string)}")
1081 if string.lower() == "interval" or cls._match.search(string) is not None:
1082 return cls(string)
1084 msg = (
1085 f"Cannot construct a 'IntervalDtype' from '{string}'.\n\n"
1086 "Incorrectly formatted string passed to constructor. "
1087 "Valid formats include Interval or Interval[dtype] "
1088 "where dtype is numeric, datetime, or timedelta"
1089 )
1090 raise TypeError(msg)
1092 @property
1093 def type(self):
1094 return Interval
1096 def __str__(self) -> str_type:
1097 if self.subtype is None:
1098 return "interval"
1099 return f"interval[{self.subtype}]"
1101 def __hash__(self) -> int:
1102 # make myself hashable
1103 return hash(str(self))
1105 def __eq__(self, other: Any) -> bool:
1106 if isinstance(other, str):
1107 return other.lower() in (self.name.lower(), str(self).lower())
1108 elif not isinstance(other, IntervalDtype):
1109 return False
1110 elif self.subtype is None or other.subtype is None:
1111 # None should match any subtype
1112 return True
1113 else:
1114 from pandas.core.dtypes.common import is_dtype_equal
1116 return is_dtype_equal(self.subtype, other.subtype)
1118 def __setstate__(self, state):
1119 # for pickle compat. __get_state__ is defined in the
1120 # PandasExtensionDtype superclass and uses the public properties to
1121 # pickle -> need to set the settable private ones here (see GH26067)
1122 self._subtype = state["subtype"]
1124 @classmethod
1125 def is_dtype(cls, dtype) -> bool:
1126 """
1127 Return a boolean if we if the passed type is an actual dtype that we
1128 can match (via string or type)
1129 """
1131 if isinstance(dtype, str):
1132 if dtype.lower().startswith("interval"):
1133 try:
1134 if cls.construct_from_string(dtype) is not None:
1135 return True
1136 else:
1137 return False
1138 except (ValueError, TypeError):
1139 return False
1140 else:
1141 return False
1142 return super().is_dtype(dtype)
1144 def __from_arrow__(self, array):
1145 """Construct IntervalArray from pyarrow Array/ChunkedArray."""
1146 import pyarrow
1147 from pandas.core.arrays import IntervalArray
1149 if isinstance(array, pyarrow.Array):
1150 chunks = [array]
1151 else:
1152 chunks = array.chunks
1154 results = []
1155 for arr in chunks:
1156 left = np.asarray(arr.storage.field("left"), dtype=self.subtype)
1157 right = np.asarray(arr.storage.field("right"), dtype=self.subtype)
1158 iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed)
1159 results.append(iarr)
1161 return IntervalArray._concat_same_type(results)