Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/period.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import timedelta
2import operator
3from typing import Any, Callable, List, Optional, Sequence, Union
5import numpy as np
7from pandas._libs.tslibs import (
8 NaT,
9 NaTType,
10 frequencies as libfrequencies,
11 iNaT,
12 period as libperiod,
13)
14from pandas._libs.tslibs.fields import isleapyear_arr
15from pandas._libs.tslibs.period import (
16 DIFFERENT_FREQ,
17 IncompatibleFrequency,
18 Period,
19 get_period_field_arr,
20 period_asfreq_arr,
21)
22from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
23from pandas.util._decorators import cache_readonly
25from pandas.core.dtypes.common import (
26 _TD_DTYPE,
27 ensure_object,
28 is_datetime64_dtype,
29 is_float_dtype,
30 is_period_dtype,
31 pandas_dtype,
32)
33from pandas.core.dtypes.dtypes import PeriodDtype
34from pandas.core.dtypes.generic import (
35 ABCIndexClass,
36 ABCPeriodArray,
37 ABCPeriodIndex,
38 ABCSeries,
39)
40from pandas.core.dtypes.missing import isna, notna
42import pandas.core.algorithms as algos
43from pandas.core.arrays import datetimelike as dtl
44import pandas.core.common as com
46from pandas.tseries import frequencies
47from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick
50def _field_accessor(name, alias, docstring=None):
51 def f(self):
52 base, mult = libfrequencies.get_freq_code(self.freq)
53 result = get_period_field_arr(alias, self.asi8, base)
54 return result
56 f.__name__ = name
57 f.__doc__ = docstring
58 return property(f)
61class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
62 """
63 Pandas ExtensionArray for storing Period data.
65 Users should use :func:`period_array` to create new instances.
67 Parameters
68 ----------
69 values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
70 The data to store. These should be arrays that can be directly
71 converted to ordinals without inference or copy (PeriodArray,
72 ndarray[int64]), or a box around such an array (Series[period],
73 PeriodIndex).
74 freq : str or DateOffset
75 The `freq` to use for the array. Mostly applicable when `values`
76 is an ndarray of integers, when `freq` is required. When `values`
77 is a PeriodArray (or box around), it's checked that ``values.freq``
78 matches `freq`.
79 dtype : PeriodDtype, optional
80 A PeriodDtype instance from which to extract a `freq`. If both
81 `freq` and `dtype` are specified, then the frequencies must match.
82 copy : bool, default False
83 Whether to copy the ordinals before storing.
85 Attributes
86 ----------
87 None
89 Methods
90 -------
91 None
93 See Also
94 --------
95 period_array : Create a new PeriodArray.
96 PeriodIndex : Immutable Index for period data.
98 Notes
99 -----
100 There are two components to a PeriodArray
102 - ordinals : integer ndarray
103 - freq : pd.tseries.offsets.Offset
105 The values are physically stored as a 1-D ndarray of integers. These are
106 called "ordinals" and represent some kind of offset from a base.
108 The `freq` indicates the span covered by each element of the array.
109 All elements in the PeriodArray have the same `freq`.
110 """
112 # array priority higher than numpy scalars
113 __array_priority__ = 1000
114 _typ = "periodarray" # ABCPeriodArray
115 _scalar_type = Period
116 _recognized_scalars = (Period,)
117 _is_recognized_dtype = is_period_dtype
119 # Names others delegate to us
120 _other_ops: List[str] = []
121 _bool_ops = ["is_leap_year"]
122 _object_ops = ["start_time", "end_time", "freq"]
123 _field_ops = [
124 "year",
125 "month",
126 "day",
127 "hour",
128 "minute",
129 "second",
130 "weekofyear",
131 "weekday",
132 "week",
133 "dayofweek",
134 "dayofyear",
135 "quarter",
136 "qyear",
137 "days_in_month",
138 "daysinmonth",
139 ]
140 _datetimelike_ops = _field_ops + _object_ops + _bool_ops
141 _datetimelike_methods = ["strftime", "to_timestamp", "asfreq"]
143 # --------------------------------------------------------------------
144 # Constructors
146 def __init__(self, values, freq=None, dtype=None, copy=False):
147 freq = validate_dtype_freq(dtype, freq)
149 if freq is not None:
150 freq = Period._maybe_convert_freq(freq)
152 if isinstance(values, ABCSeries):
153 values = values._values
154 if not isinstance(values, type(self)):
155 raise TypeError("Incorrect dtype")
157 elif isinstance(values, ABCPeriodIndex):
158 values = values._values
160 if isinstance(values, type(self)):
161 if freq is not None and freq != values.freq:
162 raise raise_on_incompatible(values, freq)
163 values, freq = values._data, values.freq
165 values = np.array(values, dtype="int64", copy=copy)
166 self._data = values
167 if freq is None:
168 raise ValueError("freq is not specified and cannot be inferred")
169 self._dtype = PeriodDtype(freq)
171 @classmethod
172 def _simple_new(cls, values, freq=None, **kwargs):
173 # alias for PeriodArray.__init__
174 return cls(values, freq=freq, **kwargs)
176 @classmethod
177 def _from_sequence(
178 cls,
179 scalars: Sequence[Optional[Period]],
180 dtype: Optional[PeriodDtype] = None,
181 copy: bool = False,
182 ) -> ABCPeriodArray:
183 if dtype:
184 freq = dtype.freq
185 else:
186 freq = None
188 if isinstance(scalars, cls):
189 validate_dtype_freq(scalars.dtype, freq)
190 if copy:
191 scalars = scalars.copy()
192 return scalars
194 periods = np.asarray(scalars, dtype=object)
195 if copy:
196 periods = periods.copy()
198 freq = freq or libperiod.extract_freq(periods)
199 ordinals = libperiod.extract_ordinals(periods, freq)
200 return cls(ordinals, freq=freq)
202 @classmethod
203 def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
204 return cls._from_sequence(strings, dtype, copy)
206 @classmethod
207 def _from_datetime64(cls, data, freq, tz=None):
208 """
209 Construct a PeriodArray from a datetime64 array
211 Parameters
212 ----------
213 data : ndarray[datetime64[ns], datetime64[ns, tz]]
214 freq : str or Tick
215 tz : tzinfo, optional
217 Returns
218 -------
219 PeriodArray[freq]
220 """
221 data, freq = dt64arr_to_periodarr(data, freq, tz)
222 return cls(data, freq=freq)
224 @classmethod
225 def _generate_range(cls, start, end, periods, freq, fields):
226 periods = dtl.validate_periods(periods)
228 if freq is not None:
229 freq = Period._maybe_convert_freq(freq)
231 field_count = len(fields)
232 if start is not None or end is not None:
233 if field_count > 0:
234 raise ValueError(
235 "Can either instantiate from fields or endpoints, but not both"
236 )
237 subarr, freq = _get_ordinal_range(start, end, periods, freq)
238 elif field_count > 0:
239 subarr, freq = _range_from_fields(freq=freq, **fields)
240 else:
241 raise ValueError("Not enough parameters to construct Period range")
243 return subarr, freq
245 # -----------------------------------------------------------------
246 # DatetimeLike Interface
248 def _unbox_scalar(self, value: Union[Period, NaTType]) -> int:
249 if value is NaT:
250 return value.value
251 elif isinstance(value, self._scalar_type):
252 if not isna(value):
253 self._check_compatible_with(value)
254 return value.ordinal
255 else:
256 raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
258 def _scalar_from_string(self, value: str) -> Period:
259 return Period(value, freq=self.freq)
261 def _check_compatible_with(self, other, setitem: bool = False):
262 if other is NaT:
263 return
264 if self.freqstr != other.freqstr:
265 raise raise_on_incompatible(self, other)
267 # --------------------------------------------------------------------
268 # Data / Attributes
270 @cache_readonly
271 def dtype(self):
272 return self._dtype
274 # error: Read-only property cannot override read-write property [misc]
275 @property # type: ignore
276 def freq(self):
277 """
278 Return the frequency object for this PeriodArray.
279 """
280 return self.dtype.freq
282 def __array__(self, dtype=None) -> np.ndarray:
283 # overriding DatetimelikeArray
284 return np.array(list(self), dtype=object)
286 def __arrow_array__(self, type=None):
287 """
288 Convert myself into a pyarrow Array.
289 """
290 import pyarrow
291 from pandas.core.arrays._arrow_utils import ArrowPeriodType
293 if type is not None:
294 if pyarrow.types.is_integer(type):
295 return pyarrow.array(self._data, mask=self.isna(), type=type)
296 elif isinstance(type, ArrowPeriodType):
297 # ensure we have the same freq
298 if self.freqstr != type.freq:
299 raise TypeError(
300 "Not supported to convert PeriodArray to array with different"
301 " 'freq' ({0} vs {1})".format(self.freqstr, type.freq)
302 )
303 else:
304 raise TypeError(
305 "Not supported to convert PeriodArray to '{0}' type".format(type)
306 )
308 period_type = ArrowPeriodType(self.freqstr)
309 storage_array = pyarrow.array(self._data, mask=self.isna(), type="int64")
310 return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
312 # --------------------------------------------------------------------
313 # Vectorized analogues of Period properties
315 year = _field_accessor(
316 "year",
317 0,
318 """
319 The year of the period.
320 """,
321 )
322 month = _field_accessor(
323 "month",
324 3,
325 """
326 The month as January=1, December=12.
327 """,
328 )
329 day = _field_accessor(
330 "day",
331 4,
332 """
333 The days of the period.
334 """,
335 )
336 hour = _field_accessor(
337 "hour",
338 5,
339 """
340 The hour of the period.
341 """,
342 )
343 minute = _field_accessor(
344 "minute",
345 6,
346 """
347 The minute of the period.
348 """,
349 )
350 second = _field_accessor(
351 "second",
352 7,
353 """
354 The second of the period.
355 """,
356 )
357 weekofyear = _field_accessor(
358 "week",
359 8,
360 """
361 The week ordinal of the year.
362 """,
363 )
364 week = weekofyear
365 dayofweek = _field_accessor(
366 "dayofweek",
367 10,
368 """
369 The day of the week with Monday=0, Sunday=6.
370 """,
371 )
372 weekday = dayofweek
373 dayofyear = day_of_year = _field_accessor(
374 "dayofyear",
375 9,
376 """
377 The ordinal day of the year.
378 """,
379 )
380 quarter = _field_accessor(
381 "quarter",
382 2,
383 """
384 The quarter of the date.
385 """,
386 )
387 qyear = _field_accessor("qyear", 1)
388 days_in_month = _field_accessor(
389 "days_in_month",
390 11,
391 """
392 The number of days in the month.
393 """,
394 )
395 daysinmonth = days_in_month
397 @property
398 def is_leap_year(self):
399 """
400 Logical indicating if the date belongs to a leap year.
401 """
402 return isleapyear_arr(np.asarray(self.year))
404 @property
405 def start_time(self):
406 return self.to_timestamp(how="start")
408 @property
409 def end_time(self):
410 return self.to_timestamp(how="end")
412 def to_timestamp(self, freq=None, how="start"):
413 """
414 Cast to DatetimeArray/Index.
416 Parameters
417 ----------
418 freq : str or DateOffset, optional
419 Target frequency. The default is 'D' for week or longer,
420 'S' otherwise.
421 how : {'s', 'e', 'start', 'end'}
422 Whether to use the start or end of the time period being converted.
424 Returns
425 -------
426 DatetimeArray/Index
427 """
428 from pandas.core.arrays import DatetimeArray
430 how = libperiod._validate_end_alias(how)
432 end = how == "E"
433 if end:
434 if freq == "B":
435 # roll forward to ensure we land on B date
436 adjust = Timedelta(1, "D") - Timedelta(1, "ns")
437 return self.to_timestamp(how="start") + adjust
438 else:
439 adjust = Timedelta(1, "ns")
440 return (self + self.freq).to_timestamp(how="start") - adjust
442 if freq is None:
443 base, mult = libfrequencies.get_freq_code(self.freq)
444 freq = libfrequencies.get_to_timestamp_base(base)
445 else:
446 freq = Period._maybe_convert_freq(freq)
448 base, mult = libfrequencies.get_freq_code(freq)
449 new_data = self.asfreq(freq, how=how)
451 new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
452 return DatetimeArray._from_sequence(new_data, freq="infer")
454 # --------------------------------------------------------------------
455 # Array-like / EA-Interface Methods
457 def _values_for_argsort(self):
458 return self._data
460 # --------------------------------------------------------------------
462 def _time_shift(self, periods, freq=None):
463 """
464 Shift each value by `periods`.
466 Note this is different from ExtensionArray.shift, which
467 shifts the *position* of each element, padding the end with
468 missing values.
470 Parameters
471 ----------
472 periods : int
473 Number of periods to shift by.
474 freq : pandas.DateOffset, pandas.Timedelta, or str
475 Frequency increment to shift by.
476 """
477 if freq is not None:
478 raise TypeError(
479 "`freq` argument is not supported for "
480 f"{type(self).__name__}._time_shift"
481 )
482 values = self.asi8 + periods * self.freq.n
483 if self._hasnans:
484 values[self._isnan] = iNaT
485 return type(self)(values, freq=self.freq)
487 @property
488 def _box_func(self):
489 return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
491 def asfreq(self, freq=None, how="E"):
492 """
493 Convert the Period Array/Index to the specified frequency `freq`.
495 Parameters
496 ----------
497 freq : str
498 A frequency.
499 how : str {'E', 'S'}
500 Whether the elements should be aligned to the end
501 or start within pa period.
503 * 'E', 'END', or 'FINISH' for end,
504 * 'S', 'START', or 'BEGIN' for start.
506 January 31st ('END') vs. January 1st ('START') for example.
508 Returns
509 -------
510 Period Array/Index
511 Constructed with the new frequency.
513 Examples
514 --------
515 >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
516 >>> pidx
517 PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
518 dtype='period[A-DEC]', freq='A-DEC')
520 >>> pidx.asfreq('M')
521 PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
522 '2015-12'], dtype='period[M]', freq='M')
524 >>> pidx.asfreq('M', how='S')
525 PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
526 '2015-01'], dtype='period[M]', freq='M')
527 """
528 how = libperiod._validate_end_alias(how)
530 freq = Period._maybe_convert_freq(freq)
532 base1, mult1 = libfrequencies.get_freq_code(self.freq)
533 base2, mult2 = libfrequencies.get_freq_code(freq)
535 asi8 = self.asi8
536 # mult1 can't be negative or 0
537 end = how == "E"
538 if end:
539 ordinal = asi8 + mult1 - 1
540 else:
541 ordinal = asi8
543 new_data = period_asfreq_arr(ordinal, base1, base2, end)
545 if self._hasnans:
546 new_data[self._isnan] = iNaT
548 return type(self)(new_data, freq=freq)
550 # ------------------------------------------------------------------
551 # Rendering Methods
553 def _formatter(self, boxed=False):
554 if boxed:
555 return str
556 return "'{}'".format
558 def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
559 """
560 actually format my specific types
561 """
562 values = self.astype(object)
564 if date_format:
565 formatter = lambda dt: dt.strftime(date_format)
566 else:
567 formatter = lambda dt: str(dt)
569 if self._hasnans:
570 mask = self._isnan
571 values[mask] = na_rep
572 imask = ~mask
573 values[imask] = np.array([formatter(dt) for dt in values[imask]])
574 else:
575 values = np.array([formatter(dt) for dt in values])
576 return values
578 # ------------------------------------------------------------------
580 def astype(self, dtype, copy=True):
581 # We handle Period[T] -> Period[U]
582 # Our parent handles everything else.
583 dtype = pandas_dtype(dtype)
585 if is_period_dtype(dtype):
586 return self.asfreq(dtype.freq)
587 return super().astype(dtype, copy=copy)
589 # ------------------------------------------------------------------
590 # Arithmetic Methods
592 def _sub_datelike(self, other):
593 assert other is not NaT
594 return NotImplemented
596 def _sub_period(self, other):
597 # If the operation is well-defined, we return an object-Index
598 # of DateOffsets. Null entries are filled with pd.NaT
599 self._check_compatible_with(other)
600 asi8 = self.asi8
601 new_data = asi8 - other.ordinal
602 new_data = np.array([self.freq * x for x in new_data])
604 if self._hasnans:
605 new_data[self._isnan] = NaT
607 return new_data
609 def _addsub_int_array(
610 self, other: np.ndarray, op: Callable[[Any, Any], Any],
611 ) -> "PeriodArray":
612 """
613 Add or subtract array of integers; equivalent to applying
614 `_time_shift` pointwise.
616 Parameters
617 ----------
618 other : np.ndarray[integer-dtype]
619 op : {operator.add, operator.sub}
621 Returns
622 -------
623 result : PeriodArray
624 """
626 assert op in [operator.add, operator.sub]
627 if op is operator.sub:
628 other = -other
629 res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
630 res_values = res_values.view("i8")
631 res_values[self._isnan] = iNaT
632 return type(self)(res_values, freq=self.freq)
634 def _add_offset(self, other):
635 assert not isinstance(other, Tick)
636 base = libfrequencies.get_base_alias(other.rule_code)
637 if base != self.freq.rule_code:
638 raise raise_on_incompatible(self, other)
640 # Note: when calling parent class's _add_timedeltalike_scalar,
641 # it will call delta_to_nanoseconds(delta). Because delta here
642 # is an integer, delta_to_nanoseconds will return it unchanged.
643 result = super()._add_timedeltalike_scalar(other.n)
644 return type(self)(result, freq=self.freq)
646 def _add_timedeltalike_scalar(self, other):
647 """
648 Parameters
649 ----------
650 other : timedelta, Tick, np.timedelta64
652 Returns
653 -------
654 result : ndarray[int64]
655 """
656 assert isinstance(self.freq, Tick) # checked by calling function
657 assert isinstance(other, (timedelta, np.timedelta64, Tick))
659 if notna(other):
660 # special handling for np.timedelta64("NaT"), avoid calling
661 # _check_timedeltalike_freq_compat as that would raise TypeError
662 other = self._check_timedeltalike_freq_compat(other)
664 # Note: when calling parent class's _add_timedeltalike_scalar,
665 # it will call delta_to_nanoseconds(delta). Because delta here
666 # is an integer, delta_to_nanoseconds will return it unchanged.
667 ordinals = super()._add_timedeltalike_scalar(other)
668 return ordinals
670 def _add_delta_tdi(self, other):
671 """
672 Parameters
673 ----------
674 other : TimedeltaArray or ndarray[timedelta64]
676 Returns
677 -------
678 result : ndarray[int64]
679 """
680 assert isinstance(self.freq, Tick) # checked by calling function
682 if not np.all(isna(other)):
683 delta = self._check_timedeltalike_freq_compat(other)
684 else:
685 # all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT
686 return self + np.timedelta64("NaT")
688 return self._addsub_int_array(delta, operator.add).asi8
690 def _add_delta(self, other):
691 """
692 Add a timedelta-like, Tick, or TimedeltaIndex-like object
693 to self, yielding a new PeriodArray
695 Parameters
696 ----------
697 other : {timedelta, np.timedelta64, Tick,
698 TimedeltaIndex, ndarray[timedelta64]}
700 Returns
701 -------
702 result : PeriodArray
703 """
704 if not isinstance(self.freq, Tick):
705 # We cannot add timedelta-like to non-tick PeriodArray
706 raise raise_on_incompatible(self, other)
708 new_ordinals = super()._add_delta(other)
709 return type(self)(new_ordinals, freq=self.freq)
711 def _check_timedeltalike_freq_compat(self, other):
712 """
713 Arithmetic operations with timedelta-like scalars or array `other`
714 are only valid if `other` is an integer multiple of `self.freq`.
715 If the operation is valid, find that integer multiple. Otherwise,
716 raise because the operation is invalid.
718 Parameters
719 ----------
720 other : timedelta, np.timedelta64, Tick,
721 ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
723 Returns
724 -------
725 multiple : int or ndarray[int64]
727 Raises
728 ------
729 IncompatibleFrequency
730 """
731 assert isinstance(self.freq, Tick) # checked by calling function
732 own_offset = frequencies.to_offset(self.freq.rule_code)
733 base_nanos = delta_to_nanoseconds(own_offset)
735 if isinstance(other, (timedelta, np.timedelta64, Tick)):
736 nanos = delta_to_nanoseconds(other)
738 elif isinstance(other, np.ndarray):
739 # numpy timedelta64 array; all entries must be compatible
740 assert other.dtype.kind == "m"
741 if other.dtype != _TD_DTYPE:
742 # i.e. non-nano unit
743 # TODO: disallow unit-less timedelta64
744 other = other.astype(_TD_DTYPE)
745 nanos = other.view("i8")
746 else:
747 # TimedeltaArray/Index
748 nanos = other.asi8
750 if np.all(nanos % base_nanos == 0):
751 # nanos being added is an integer multiple of the
752 # base-frequency to self.freq
753 delta = nanos // base_nanos
754 # delta is the integer (or integer-array) number of periods
755 # by which will be added to self.
756 return delta
758 raise raise_on_incompatible(self, other)
761def raise_on_incompatible(left, right):
762 """
763 Helper function to render a consistent error message when raising
764 IncompatibleFrequency.
766 Parameters
767 ----------
768 left : PeriodArray
769 right : None, DateOffset, Period, ndarray, or timedelta-like
771 Returns
772 -------
773 IncompatibleFrequency
774 Exception to be raised by the caller.
775 """
776 # GH#24283 error message format depends on whether right is scalar
777 if isinstance(right, np.ndarray) or right is None:
778 other_freq = None
779 elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)):
780 other_freq = right.freqstr
781 else:
782 other_freq = _delta_to_tick(Timedelta(right)).freqstr
784 msg = DIFFERENT_FREQ.format(
785 cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
786 )
787 return IncompatibleFrequency(msg)
790# -------------------------------------------------------------------
791# Constructor Helpers
794def period_array(
795 data: Sequence[Optional[Period]],
796 freq: Optional[Union[str, Tick]] = None,
797 copy: bool = False,
798) -> PeriodArray:
799 """
800 Construct a new PeriodArray from a sequence of Period scalars.
802 Parameters
803 ----------
804 data : Sequence of Period objects
805 A sequence of Period objects. These are required to all have
806 the same ``freq.`` Missing values can be indicated by ``None``
807 or ``pandas.NaT``.
808 freq : str, Tick, or Offset
809 The frequency of every element of the array. This can be specified
810 to avoid inferring the `freq` from `data`.
811 copy : bool, default False
812 Whether to ensure a copy of the data is made.
814 Returns
815 -------
816 PeriodArray
818 See Also
819 --------
820 PeriodArray
821 pandas.PeriodIndex
823 Examples
824 --------
825 >>> period_array([pd.Period('2017', freq='A'),
826 ... pd.Period('2018', freq='A')])
827 <PeriodArray>
828 ['2017', '2018']
829 Length: 2, dtype: period[A-DEC]
831 >>> period_array([pd.Period('2017', freq='A'),
832 ... pd.Period('2018', freq='A'),
833 ... pd.NaT])
834 <PeriodArray>
835 ['2017', '2018', 'NaT']
836 Length: 3, dtype: period[A-DEC]
838 Integers that look like years are handled
840 >>> period_array([2000, 2001, 2002], freq='D')
841 ['2000-01-01', '2001-01-01', '2002-01-01']
842 Length: 3, dtype: period[D]
844 Datetime-like strings may also be passed
846 >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
847 <PeriodArray>
848 ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
849 Length: 4, dtype: period[Q-DEC]
850 """
851 if is_datetime64_dtype(data):
852 return PeriodArray._from_datetime64(data, freq)
853 if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)):
854 return PeriodArray(data, freq)
856 # other iterable of some kind
857 if not isinstance(data, (np.ndarray, list, tuple)):
858 data = list(data)
860 data = np.asarray(data)
862 dtype: Optional[PeriodDtype]
863 if freq:
864 dtype = PeriodDtype(freq)
865 else:
866 dtype = None
868 if is_float_dtype(data) and len(data) > 0:
869 raise TypeError("PeriodIndex does not allow floating point in construction")
871 data = ensure_object(data)
873 return PeriodArray._from_sequence(data, dtype=dtype)
876def validate_dtype_freq(dtype, freq):
877 """
878 If both a dtype and a freq are available, ensure they match. If only
879 dtype is available, extract the implied freq.
881 Parameters
882 ----------
883 dtype : dtype
884 freq : DateOffset or None
886 Returns
887 -------
888 freq : DateOffset
890 Raises
891 ------
892 ValueError : non-period dtype
893 IncompatibleFrequency : mismatch between dtype and freq
894 """
895 if freq is not None:
896 freq = frequencies.to_offset(freq)
898 if dtype is not None:
899 dtype = pandas_dtype(dtype)
900 if not is_period_dtype(dtype):
901 raise ValueError("dtype must be PeriodDtype")
902 if freq is None:
903 freq = dtype.freq
904 elif freq != dtype.freq:
905 raise IncompatibleFrequency("specified freq and dtype are different")
906 return freq
909def dt64arr_to_periodarr(data, freq, tz=None):
910 """
911 Convert an datetime-like array to values Period ordinals.
913 Parameters
914 ----------
915 data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
916 freq : Optional[Union[str, Tick]]
917 Must match the `freq` on the `data` if `data` is a DatetimeIndex
918 or Series.
919 tz : Optional[tzinfo]
921 Returns
922 -------
923 ordinals : ndarray[int]
924 freq : Tick
925 The frequency extracted from the Series or DatetimeIndex if that's
926 used.
928 """
929 if data.dtype != np.dtype("M8[ns]"):
930 raise ValueError(f"Wrong dtype: {data.dtype}")
932 if freq is None:
933 if isinstance(data, ABCIndexClass):
934 data, freq = data._values, data.freq
935 elif isinstance(data, ABCSeries):
936 data, freq = data._values, data.dt.freq
938 freq = Period._maybe_convert_freq(freq)
940 if isinstance(data, (ABCIndexClass, ABCSeries)):
941 data = data._values
943 base, mult = libfrequencies.get_freq_code(freq)
944 return libperiod.dt64arr_to_periodarr(data.view("i8"), base, tz), freq
947def _get_ordinal_range(start, end, periods, freq, mult=1):
948 if com.count_not_none(start, end, periods) != 2:
949 raise ValueError(
950 "Of the three parameters: start, end, and periods, "
951 "exactly two must be specified"
952 )
954 if freq is not None:
955 _, mult = libfrequencies.get_freq_code(freq)
957 if start is not None:
958 start = Period(start, freq)
959 if end is not None:
960 end = Period(end, freq)
962 is_start_per = isinstance(start, Period)
963 is_end_per = isinstance(end, Period)
965 if is_start_per and is_end_per and start.freq != end.freq:
966 raise ValueError("start and end must have same freq")
967 if start is NaT or end is NaT:
968 raise ValueError("start and end must not be NaT")
970 if freq is None:
971 if is_start_per:
972 freq = start.freq
973 elif is_end_per:
974 freq = end.freq
975 else: # pragma: no cover
976 raise ValueError("Could not infer freq from start/end")
978 if periods is not None:
979 periods = periods * mult
980 if start is None:
981 data = np.arange(
982 end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
983 )
984 else:
985 data = np.arange(
986 start.ordinal, start.ordinal + periods, mult, dtype=np.int64
987 )
988 else:
989 data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
991 return data, freq
994def _range_from_fields(
995 year=None,
996 month=None,
997 quarter=None,
998 day=None,
999 hour=None,
1000 minute=None,
1001 second=None,
1002 freq=None,
1003):
1004 if hour is None:
1005 hour = 0
1006 if minute is None:
1007 minute = 0
1008 if second is None:
1009 second = 0
1010 if day is None:
1011 day = 1
1013 ordinals = []
1015 if quarter is not None:
1016 if freq is None:
1017 freq = "Q"
1018 base = libfrequencies.FreqGroup.FR_QTR
1019 else:
1020 base, mult = libfrequencies.get_freq_code(freq)
1021 if base != libfrequencies.FreqGroup.FR_QTR:
1022 raise AssertionError("base must equal FR_QTR")
1024 year, quarter = _make_field_arrays(year, quarter)
1025 for y, q in zip(year, quarter):
1026 y, m = libperiod.quarter_to_myear(y, q, freq)
1027 val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
1028 ordinals.append(val)
1029 else:
1030 base, mult = libfrequencies.get_freq_code(freq)
1031 arrays = _make_field_arrays(year, month, day, hour, minute, second)
1032 for y, mth, d, h, mn, s in zip(*arrays):
1033 ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
1035 return np.array(ordinals, dtype=np.int64), freq
1038def _make_field_arrays(*fields):
1039 length = None
1040 for x in fields:
1041 if isinstance(x, (list, np.ndarray, ABCSeries)):
1042 if length is not None and len(x) != length:
1043 raise ValueError("Mismatched Period array lengths")
1044 elif length is None:
1045 length = len(x)
1047 arrays = [
1048 np.asarray(x)
1049 if isinstance(x, (np.ndarray, list, ABCSeries))
1050 else np.repeat(x, length)
1051 for x in fields
1052 ]
1054 return arrays