Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/base.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import datetime
2import operator
3from textwrap import dedent
4from typing import Dict, FrozenSet, Hashable, Optional, Union
5import warnings
7import numpy as np
9from pandas._libs import algos as libalgos, index as libindex, lib
10import pandas._libs.join as libjoin
11from pandas._libs.lib import is_datetime_array
12from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
13from pandas._libs.tslibs.period import IncompatibleFrequency
14from pandas._libs.tslibs.timezones import tz_compare
15from pandas.compat import set_function_name
16from pandas.compat.numpy import function as nv
17from pandas.util._decorators import Appender, Substitution, cache_readonly
19from pandas.core.dtypes import concat as _concat
20from pandas.core.dtypes.cast import maybe_cast_to_integer_array
21from pandas.core.dtypes.common import (
22 ensure_categorical,
23 ensure_int64,
24 ensure_object,
25 ensure_platform_int,
26 is_bool,
27 is_bool_dtype,
28 is_categorical,
29 is_categorical_dtype,
30 is_datetime64_any_dtype,
31 is_datetime64tz_dtype,
32 is_dtype_equal,
33 is_extension_array_dtype,
34 is_float,
35 is_float_dtype,
36 is_hashable,
37 is_integer,
38 is_integer_dtype,
39 is_interval_dtype,
40 is_iterator,
41 is_list_like,
42 is_object_dtype,
43 is_period_dtype,
44 is_scalar,
45 is_signed_integer_dtype,
46 is_timedelta64_dtype,
47 is_unsigned_integer_dtype,
48)
49from pandas.core.dtypes.concat import concat_compat
50from pandas.core.dtypes.generic import (
51 ABCCategorical,
52 ABCDataFrame,
53 ABCDatetimeArray,
54 ABCDatetimeIndex,
55 ABCIndexClass,
56 ABCIntervalIndex,
57 ABCMultiIndex,
58 ABCPandasArray,
59 ABCPeriodIndex,
60 ABCSeries,
61 ABCTimedeltaIndex,
62)
63from pandas.core.dtypes.missing import array_equivalent, isna
65from pandas.core import ops
66from pandas.core.accessor import CachedAccessor
67import pandas.core.algorithms as algos
68from pandas.core.arrays import ExtensionArray
69from pandas.core.base import IndexOpsMixin, PandasObject
70import pandas.core.common as com
71from pandas.core.construction import extract_array
72from pandas.core.indexers import deprecate_ndim_indexing, maybe_convert_indices
73from pandas.core.indexes.frozen import FrozenList
74import pandas.core.missing as missing
75from pandas.core.ops import get_op_result_name
76from pandas.core.ops.invalid import make_invalid_op
77from pandas.core.strings import StringMethods
79from pandas.io.formats.printing import (
80 default_pprint,
81 format_object_attrs,
82 format_object_summary,
83 pprint_thing,
84)
86__all__ = ["Index"]
88_unsortable_types = frozenset(("mixed", "mixed-integer"))
90_index_doc_kwargs = dict(
91 klass="Index",
92 inplace="",
93 target_klass="Index",
94 raises_section="",
95 unique="Index",
96 duplicated="np.ndarray",
97)
98_index_shared_docs = dict()
101def _make_comparison_op(op, cls):
102 def cmp_method(self, other):
103 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)):
104 if other.ndim > 0 and len(self) != len(other):
105 raise ValueError("Lengths must match to compare")
107 if is_object_dtype(self) and isinstance(other, ABCCategorical):
108 left = type(other)(self._values, dtype=other.dtype)
109 return op(left, other)
110 elif is_object_dtype(self) and isinstance(other, ExtensionArray):
111 # e.g. PeriodArray
112 with np.errstate(all="ignore"):
113 result = op(self.values, other)
115 elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex):
116 # don't pass MultiIndex
117 with np.errstate(all="ignore"):
118 result = ops.comp_method_OBJECT_ARRAY(op, self.values, other)
120 else:
121 with np.errstate(all="ignore"):
122 result = op(self.values, np.asarray(other))
124 if is_bool_dtype(result):
125 return result
126 return ops.invalid_comparison(self, other, op)
128 name = f"__{op.__name__}__"
129 return set_function_name(cmp_method, name, cls)
132def _make_arithmetic_op(op, cls):
133 def index_arithmetic_method(self, other):
134 if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)):
135 return NotImplemented
137 from pandas import Series
139 result = op(Series(self), other)
140 if isinstance(result, tuple):
141 return (Index(result[0]), Index(result[1]))
142 return Index(result)
144 name = f"__{op.__name__}__"
145 # TODO: docstring?
146 return set_function_name(index_arithmetic_method, name, cls)
149class InvalidIndexError(Exception):
150 pass
153_o_dtype = np.dtype(object)
154_Identity = object
157def _new_Index(cls, d):
158 """
159 This is called upon unpickling, rather than the default which doesn't
160 have arguments and breaks __new__.
161 """
162 # required for backward compat, because PI can't be instantiated with
163 # ordinals through __new__ GH #13277
164 if issubclass(cls, ABCPeriodIndex):
165 from pandas.core.indexes.period import _new_PeriodIndex
167 return _new_PeriodIndex(cls, **d)
169 if issubclass(cls, ABCMultiIndex):
170 if "labels" in d and "codes" not in d:
171 # GH#23752 "labels" kwarg has been replaced with "codes"
172 d["codes"] = d.pop("labels")
174 return cls.__new__(cls, **d)
177class Index(IndexOpsMixin, PandasObject):
178 """
179 Immutable ndarray implementing an ordered, sliceable set. The basic object
180 storing axis labels for all pandas objects.
182 Parameters
183 ----------
184 data : array-like (1-dimensional)
185 dtype : NumPy dtype (default: object)
186 If dtype is None, we find the dtype that best fits the data.
187 If an actual dtype is provided, we coerce to that dtype if it's safe.
188 Otherwise, an error will be raised.
189 copy : bool
190 Make a copy of input ndarray.
191 name : object
192 Name to be stored in the index.
193 tupleize_cols : bool (default: True)
194 When True, attempt to create a MultiIndex if possible.
196 See Also
197 --------
198 RangeIndex : Index implementing a monotonic integer range.
199 CategoricalIndex : Index of :class:`Categorical` s.
200 MultiIndex : A multi-level, or hierarchical, Index.
201 IntervalIndex : An Index of :class:`Interval` s.
202 DatetimeIndex, TimedeltaIndex, PeriodIndex
203 Int64Index, UInt64Index, Float64Index
205 Notes
206 -----
207 An Index instance can **only** contain hashable objects
209 Examples
210 --------
211 >>> pd.Index([1, 2, 3])
212 Int64Index([1, 2, 3], dtype='int64')
214 >>> pd.Index(list('abc'))
215 Index(['a', 'b', 'c'], dtype='object')
216 """
218 # tolist is not actually deprecated, just suppressed in the __dir__
219 _deprecations: FrozenSet[str] = (
220 PandasObject._deprecations
221 | IndexOpsMixin._deprecations
222 | frozenset(["contains", "set_value"])
223 )
225 # To hand over control to subclasses
226 _join_precedence = 1
228 # Cython methods; see github.com/cython/cython/issues/2647
229 # for why we need to wrap these instead of making them class attributes
230 # Moreover, cython will choose the appropriate-dtyped sub-function
231 # given the dtypes of the passed arguments
232 def _left_indexer_unique(self, left, right):
233 return libjoin.left_join_indexer_unique(left, right)
235 def _left_indexer(self, left, right):
236 return libjoin.left_join_indexer(left, right)
238 def _inner_indexer(self, left, right):
239 return libjoin.inner_join_indexer(left, right)
241 def _outer_indexer(self, left, right):
242 return libjoin.outer_join_indexer(left, right)
244 _typ = "index"
245 _data: Union[ExtensionArray, np.ndarray]
246 _id = None
247 _name: Optional[Hashable] = None
248 # MultiIndex.levels previously allowed setting the index name. We
249 # don't allow this anymore, and raise if it happens rather than
250 # failing silently.
251 _no_setting_name: bool = False
252 _comparables = ["name"]
253 _attributes = ["name"]
254 _is_numeric_dtype = False
255 _can_hold_na = True
257 # would we like our indexing holder to defer to us
258 _defer_to_indexing = False
260 # prioritize current class for _shallow_copy_with_infer,
261 # used to infer integers as datetime-likes
262 _infer_as_myclass = False
264 _engine_type = libindex.ObjectEngine
265 # whether we support partial string indexing. Overridden
266 # in DatetimeIndex and PeriodIndex
267 _supports_partial_string_indexing = False
269 _accessors = {"str"}
271 str = CachedAccessor("str", StringMethods)
273 # --------------------------------------------------------------------
274 # Constructors
276 def __new__(
277 cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs,
278 ) -> "Index":
280 from pandas.core.indexes.range import RangeIndex
282 name = maybe_extract_name(name, data, cls)
284 if isinstance(data, ABCPandasArray):
285 # ensure users don't accidentally put a PandasArray in an index.
286 data = data.to_numpy()
288 # range
289 if isinstance(data, RangeIndex):
290 return RangeIndex(start=data, copy=copy, dtype=dtype, name=name)
291 elif isinstance(data, range):
292 return RangeIndex.from_range(data, dtype=dtype, name=name)
294 # categorical
295 elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
296 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
297 from pandas.core.indexes.category import CategoricalIndex
299 return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs)
301 # interval
302 elif is_interval_dtype(data) or is_interval_dtype(dtype):
303 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
304 from pandas.core.indexes.interval import IntervalIndex
306 closed = kwargs.pop("closed", None)
307 if is_dtype_equal(_o_dtype, dtype):
308 return IntervalIndex(
309 data, name=name, copy=copy, closed=closed, **kwargs
310 ).astype(object)
311 return IntervalIndex(
312 data, dtype=dtype, name=name, copy=copy, closed=closed, **kwargs
313 )
315 elif (
316 is_datetime64_any_dtype(data)
317 or is_datetime64_any_dtype(dtype)
318 or "tz" in kwargs
319 ):
320 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
321 from pandas import DatetimeIndex
323 if is_dtype_equal(_o_dtype, dtype):
324 # GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
325 # will raise in the where `data` is already tz-aware. So
326 # we leave it out of this step and cast to object-dtype after
327 # the DatetimeIndex construction.
328 # Note we can pass copy=False because the .astype below
329 # will always make a copy
330 return DatetimeIndex(data, copy=False, name=name, **kwargs).astype(
331 object
332 )
333 else:
334 return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs)
336 elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype):
337 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
338 from pandas import TimedeltaIndex
340 if is_dtype_equal(_o_dtype, dtype):
341 # Note we can pass copy=False because the .astype below
342 # will always make a copy
343 return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype(
344 object
345 )
346 else:
347 return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs)
349 elif is_period_dtype(data) or is_period_dtype(dtype):
350 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
351 from pandas import PeriodIndex
353 if is_dtype_equal(_o_dtype, dtype):
354 return PeriodIndex(data, copy=False, name=name, **kwargs).astype(object)
355 return PeriodIndex(data, dtype=dtype, copy=copy, name=name, **kwargs)
357 # extension dtype
358 elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
359 if not (dtype is None or is_object_dtype(dtype)):
360 # coerce to the provided dtype
361 ea_cls = dtype.construct_array_type()
362 data = ea_cls._from_sequence(data, dtype=dtype, copy=False)
363 else:
364 data = np.asarray(data, dtype=object)
366 # coerce to the object dtype
367 data = data.astype(object)
368 return Index(data, dtype=object, copy=copy, name=name, **kwargs)
370 # index-like
371 elif isinstance(data, (np.ndarray, Index, ABCSeries)):
372 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
373 from pandas.core.indexes.numeric import (
374 Float64Index,
375 Int64Index,
376 UInt64Index,
377 )
379 if dtype is not None:
380 # we need to avoid having numpy coerce
381 # things that look like ints/floats to ints unless
382 # they are actually ints, e.g. '0' and 0.0
383 # should not be coerced
384 # GH 11836
385 data = _maybe_cast_with_dtype(data, dtype, copy)
386 dtype = data.dtype # TODO: maybe not for object?
388 # maybe coerce to a sub-class
389 if is_signed_integer_dtype(data.dtype):
390 return Int64Index(data, copy=copy, dtype=dtype, name=name)
391 elif is_unsigned_integer_dtype(data.dtype):
392 return UInt64Index(data, copy=copy, dtype=dtype, name=name)
393 elif is_float_dtype(data.dtype):
394 return Float64Index(data, copy=copy, dtype=dtype, name=name)
395 elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
396 subarr = data.astype("object")
397 else:
398 subarr = com.asarray_tuplesafe(data, dtype=object)
400 # asarray_tuplesafe does not always copy underlying data,
401 # so need to make sure that this happens
402 if copy:
403 subarr = subarr.copy()
405 if dtype is None:
406 new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
407 if new_dtype is not None:
408 return cls(
409 new_data, dtype=new_dtype, copy=False, name=name, **kwargs
410 )
412 if kwargs:
413 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
414 if subarr.ndim > 1:
415 # GH#13601, GH#20285, GH#27125
416 raise ValueError("Index data must be 1-dimensional")
417 return cls._simple_new(subarr, name, **kwargs)
419 elif hasattr(data, "__array__"):
420 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
421 elif data is None or is_scalar(data):
422 raise cls._scalar_data_error(data)
423 else:
424 if tupleize_cols and is_list_like(data):
425 # GH21470: convert iterable to list before determining if empty
426 if is_iterator(data):
427 data = list(data)
429 if data and all(isinstance(e, tuple) for e in data):
430 # we must be all tuples, otherwise don't construct
431 # 10697
432 from pandas.core.indexes.multi import MultiIndex
434 return MultiIndex.from_tuples(
435 data, names=name or kwargs.get("names")
436 )
437 # other iterable of some kind
438 subarr = com.asarray_tuplesafe(data, dtype=object)
439 return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
441 """
442 NOTE for new Index creation:
444 - _simple_new: It returns new Index with the same type as the caller.
445 All metadata (such as name) must be provided by caller's responsibility.
446 Using _shallow_copy is recommended because it fills these metadata
447 otherwise specified.
449 - _shallow_copy: It returns new Index with the same type (using
450 _simple_new), but fills caller's metadata otherwise specified. Passed
451 kwargs will overwrite corresponding metadata.
453 - _shallow_copy_with_infer: It returns new Index inferring its type
454 from passed values. It fills caller's metadata otherwise specified as the
455 same as _shallow_copy.
457 See each method's docstring.
458 """
460 @property
461 def asi8(self):
462 """
463 Integer representation of the values.
465 Returns
466 -------
467 ndarray
468 An ndarray with int64 dtype.
469 """
470 return None
472 @classmethod
473 def _simple_new(cls, values, name=None, dtype=None):
474 """
475 We require that we have a dtype compat for the values. If we are passed
476 a non-dtype compat, then coerce using the constructor.
478 Must be careful not to recurse.
479 """
480 if isinstance(values, (ABCSeries, ABCIndexClass)):
481 # Index._data must always be an ndarray.
482 # This is no-copy for when _values is an ndarray,
483 # which should be always at this point.
484 values = np.asarray(values._values)
486 result = object.__new__(cls)
487 result._data = values
488 # _index_data is a (temporary?) fix to ensure that the direct data
489 # manipulation we do in `_libs/reduction.pyx` continues to work.
490 # We need access to the actual ndarray, since we're messing with
491 # data buffers and strides. We don't re-use `_ndarray_values`, since
492 # we actually set this value too.
493 result._index_data = values
494 result._name = name
496 return result._reset_identity()
498 @cache_readonly
499 def _constructor(self):
500 return type(self)
502 # --------------------------------------------------------------------
503 # Index Internals Methods
505 def _get_attributes_dict(self):
506 """
507 Return an attributes dict for my class.
508 """
509 return {k: getattr(self, k, None) for k in self._attributes}
511 _index_shared_docs[
512 "_shallow_copy"
513 ] = """
514 Create a new Index with the same class as the caller, don't copy the
515 data, use the same object attributes with passed in attributes taking
516 precedence.
518 *this is an internal non-public method*
520 Parameters
521 ----------
522 values : the values to create the new Index, optional
523 kwargs : updates the default attributes for this Index
524 """
526 @Appender(_index_shared_docs["_shallow_copy"])
527 def _shallow_copy(self, values=None, **kwargs):
528 if values is None:
529 values = self.values
530 attributes = self._get_attributes_dict()
531 attributes.update(kwargs)
532 if not len(values) and "dtype" not in kwargs:
533 attributes["dtype"] = self.dtype
535 # _simple_new expects an the type of self._data
536 values = getattr(values, "_values", values)
537 if isinstance(values, ABCDatetimeArray):
538 # `self.values` returns `self` for tz-aware, so we need to unwrap
539 # more specifically
540 values = values.asi8
542 return self._simple_new(values, **attributes)
544 def _shallow_copy_with_infer(self, values, **kwargs):
545 """
546 Create a new Index inferring the class with passed value, don't copy
547 the data, use the same object attributes with passed in attributes
548 taking precedence.
550 *this is an internal non-public method*
552 Parameters
553 ----------
554 values : the values to create the new Index, optional
555 kwargs : updates the default attributes for this Index
556 """
557 attributes = self._get_attributes_dict()
558 attributes.update(kwargs)
559 attributes["copy"] = False
560 if not len(values) and "dtype" not in kwargs:
561 attributes["dtype"] = self.dtype
562 if self._infer_as_myclass:
563 try:
564 return self._constructor(values, **attributes)
565 except (TypeError, ValueError):
566 pass
567 return Index(values, **attributes)
569 def _update_inplace(self, result, **kwargs):
570 # guard when called from IndexOpsMixin
571 raise TypeError("Index can't be updated inplace")
573 def is_(self, other) -> bool:
574 """
575 More flexible, faster check like ``is`` but that works through views.
577 Note: this is *not* the same as ``Index.identical()``, which checks
578 that metadata is also the same.
580 Parameters
581 ----------
582 other : object
583 other object to compare against.
585 Returns
586 -------
587 True if both have same underlying data, False otherwise : bool
588 """
589 # use something other than None to be clearer
590 return self._id is getattr(other, "_id", Ellipsis) and self._id is not None
592 def _reset_identity(self):
593 """
594 Initializes or resets ``_id`` attribute with new object.
595 """
596 self._id = _Identity()
597 return self
599 def _cleanup(self):
600 self._engine.clear_mapping()
602 @cache_readonly
603 def _engine(self):
604 # property, for now, slow to look up
606 # to avoid a reference cycle, bind `_ndarray_values` to a local variable, so
607 # `self` is not passed into the lambda.
608 _ndarray_values = self._ndarray_values
609 return self._engine_type(lambda: _ndarray_values, len(self))
611 # --------------------------------------------------------------------
612 # Array-Like Methods
614 # ndarray compat
615 def __len__(self) -> int:
616 """
617 Return the length of the Index.
618 """
619 return len(self._data)
621 def __array__(self, dtype=None) -> np.ndarray:
622 """
623 The array interface, return my values.
624 """
625 return np.asarray(self._data, dtype=dtype)
627 def __array_wrap__(self, result, context=None):
628 """
629 Gets called after a ufunc.
630 """
631 result = lib.item_from_zerodim(result)
632 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
633 return result
635 attrs = self._get_attributes_dict()
636 return Index(result, **attrs)
638 @cache_readonly
639 def dtype(self):
640 """
641 Return the dtype object of the underlying data.
642 """
643 return self._data.dtype
645 def ravel(self, order="C"):
646 """
647 Return an ndarray of the flattened values of the underlying data.
649 Returns
650 -------
651 numpy.ndarray
652 Flattened array.
654 See Also
655 --------
656 numpy.ndarray.ravel
657 """
658 return self._ndarray_values.ravel(order=order)
660 def view(self, cls=None):
662 # we need to see if we are subclassing an
663 # index type here
664 if cls is not None and not hasattr(cls, "_typ"):
665 result = self._data.view(cls)
666 else:
667 result = self._shallow_copy()
668 if isinstance(result, Index):
669 result._id = self._id
670 return result
672 _index_shared_docs[
673 "astype"
674 ] = """
675 Create an Index with values cast to dtypes. The class of a new Index
676 is determined by dtype. When conversion is impossible, a ValueError
677 exception is raised.
679 Parameters
680 ----------
681 dtype : numpy dtype or pandas type
682 Note that any signed integer `dtype` is treated as ``'int64'``,
683 and any unsigned integer `dtype` is treated as ``'uint64'``,
684 regardless of the size.
685 copy : bool, default True
686 By default, astype always returns a newly allocated object.
687 If copy is set to False and internal requirements on dtype are
688 satisfied, the original data is used to create a new Index
689 or the original Index is returned.
691 Returns
692 -------
693 Index
694 Index with values cast to specified dtype.
695 """
697 @Appender(_index_shared_docs["astype"])
698 def astype(self, dtype, copy=True):
699 if is_dtype_equal(self.dtype, dtype):
700 return self.copy() if copy else self
702 elif is_categorical_dtype(dtype):
703 from pandas.core.indexes.category import CategoricalIndex
705 return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy)
707 elif is_extension_array_dtype(dtype):
708 return Index(np.asarray(self), dtype=dtype, copy=copy)
710 try:
711 casted = self.values.astype(dtype, copy=copy)
712 except (TypeError, ValueError):
713 raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}")
714 return Index(casted, name=self.name, dtype=dtype)
716 _index_shared_docs[
717 "take"
718 ] = """
719 Return a new %(klass)s of the values selected by the indices.
721 For internal compatibility with numpy arrays.
723 Parameters
724 ----------
725 indices : list
726 Indices to be taken.
727 axis : int, optional
728 The axis over which to select values, always 0.
729 allow_fill : bool, default True
730 fill_value : bool, default None
731 If allow_fill=True and fill_value is not None, indices specified by
732 -1 is regarded as NA. If Index doesn't hold NA, raise ValueError.
734 Returns
735 -------
736 numpy.ndarray
737 Elements of given indices.
739 See Also
740 --------
741 numpy.ndarray.take
742 """
744 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
745 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
746 if kwargs:
747 nv.validate_take(tuple(), kwargs)
748 indices = ensure_platform_int(indices)
749 if self._can_hold_na:
750 taken = self._assert_take_fillable(
751 self.values,
752 indices,
753 allow_fill=allow_fill,
754 fill_value=fill_value,
755 na_value=self._na_value,
756 )
757 else:
758 if allow_fill and fill_value is not None:
759 cls_name = type(self).__name__
760 raise ValueError(
761 f"Unable to fill values because {cls_name} cannot contain NA"
762 )
763 taken = self.values.take(indices)
764 return self._shallow_copy(taken)
766 def _assert_take_fillable(
767 self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan
768 ):
769 """
770 Internal method to handle NA filling of take.
771 """
772 indices = ensure_platform_int(indices)
774 # only fill if we are passing a non-None fill_value
775 if allow_fill and fill_value is not None:
776 if (indices < -1).any():
777 raise ValueError(
778 "When allow_fill=True and fill_value is not None, "
779 "all indices must be >= -1"
780 )
781 taken = algos.take(
782 values, indices, allow_fill=allow_fill, fill_value=na_value
783 )
784 else:
785 taken = values.take(indices)
786 return taken
788 _index_shared_docs[
789 "repeat"
790 ] = """
791 Repeat elements of a %(klass)s.
793 Returns a new %(klass)s where each element of the current %(klass)s
794 is repeated consecutively a given number of times.
796 Parameters
797 ----------
798 repeats : int or array of ints
799 The number of repetitions for each element. This should be a
800 non-negative integer. Repeating 0 times will return an empty
801 %(klass)s.
802 axis : None
803 Must be ``None``. Has no effect but is accepted for compatibility
804 with numpy.
806 Returns
807 -------
808 repeated_index : %(klass)s
809 Newly created %(klass)s with repeated elements.
811 See Also
812 --------
813 Series.repeat : Equivalent function for Series.
814 numpy.repeat : Similar method for :class:`numpy.ndarray`.
816 Examples
817 --------
818 >>> idx = pd.Index(['a', 'b', 'c'])
819 >>> idx
820 Index(['a', 'b', 'c'], dtype='object')
821 >>> idx.repeat(2)
822 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
823 >>> idx.repeat([1, 2, 3])
824 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
825 """
827 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
828 def repeat(self, repeats, axis=None):
829 repeats = ensure_platform_int(repeats)
830 nv.validate_repeat(tuple(), dict(axis=axis))
831 return self._shallow_copy(self._values.repeat(repeats))
833 # --------------------------------------------------------------------
834 # Copying Methods
836 _index_shared_docs[
837 "copy"
838 ] = """
839 Make a copy of this object. Name and dtype sets those attributes on
840 the new object.
842 Parameters
843 ----------
844 name : str, optional
845 deep : bool, default False
846 dtype : numpy dtype or pandas type
848 Returns
849 -------
850 copy : Index
852 Notes
853 -----
854 In most cases, there should be no functional difference from using
855 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
856 """
858 @Appender(_index_shared_docs["copy"])
859 def copy(self, name=None, deep=False, dtype=None, **kwargs):
860 if deep:
861 new_index = self._shallow_copy(self._data.copy())
862 else:
863 new_index = self._shallow_copy()
865 names = kwargs.get("names")
866 names = self._validate_names(name=name, names=names, deep=deep)
867 new_index = new_index.set_names(names)
869 if dtype:
870 new_index = new_index.astype(dtype)
871 return new_index
873 def __copy__(self, **kwargs):
874 return self.copy(**kwargs)
876 def __deepcopy__(self, memo=None):
877 """
878 Parameters
879 ----------
880 memo, default None
881 Standard signature. Unused
882 """
883 return self.copy(deep=True)
885 # --------------------------------------------------------------------
886 # Rendering Methods
888 def __repr__(self):
889 """
890 Return a string representation for this object.
891 """
892 klass_name = type(self).__name__
893 data = self._format_data()
894 attrs = self._format_attrs()
895 space = self._format_space()
896 attrs_str = [f"{k}={v}" for k, v in attrs]
897 prepr = f",{space}".join(attrs_str)
899 # no data provided, just attributes
900 if data is None:
901 data = ""
903 res = f"{klass_name}({data}{prepr})"
905 return res
907 def _format_space(self):
909 # using space here controls if the attributes
910 # are line separated or not (the default)
912 # max_seq_items = get_option('display.max_seq_items')
913 # if len(self) > max_seq_items:
914 # space = "\n%s" % (' ' * (len(klass) + 1))
915 return " "
917 @property
918 def _formatter_func(self):
919 """
920 Return the formatter function.
921 """
922 return default_pprint
924 def _format_data(self, name=None):
925 """
926 Return the formatted data as a unicode string.
927 """
929 # do we want to justify (only do so for non-objects)
930 is_justify = not (
931 self.inferred_type in ("string", "unicode")
932 or (
933 self.inferred_type == "categorical" and is_object_dtype(self.categories)
934 )
935 )
937 return format_object_summary(
938 self, self._formatter_func, is_justify=is_justify, name=name
939 )
941 def _format_attrs(self):
942 """
943 Return a list of tuples of the (attr,formatted_value).
944 """
945 return format_object_attrs(self)
947 def _mpl_repr(self):
948 # how to represent ourselves to matplotlib
949 return self.values
951 def format(self, name=False, formatter=None, **kwargs):
952 """
953 Render a string representation of the Index.
954 """
955 header = []
956 if name:
957 header.append(
958 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
959 if self.name is not None
960 else ""
961 )
963 if formatter is not None:
964 return header + list(self.map(formatter))
966 return self._format_with_header(header, **kwargs)
968 def _format_with_header(self, header, na_rep="NaN", **kwargs):
969 values = self.values
971 from pandas.io.formats.format import format_array
973 if is_categorical_dtype(values.dtype):
974 values = np.array(values)
976 elif is_object_dtype(values.dtype):
977 values = lib.maybe_convert_objects(values, safe=1)
979 if is_object_dtype(values.dtype):
980 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
982 # could have nans
983 mask = isna(values)
984 if mask.any():
985 result = np.array(result)
986 result[mask] = na_rep
987 result = result.tolist()
989 else:
990 result = _trim_front(format_array(values, None, justify="left"))
991 return header + result
993 def to_native_types(self, slicer=None, **kwargs):
994 """
995 Format specified values of `self` and return them.
997 Parameters
998 ----------
999 slicer : int, array-like
1000 An indexer into `self` that specifies which values
1001 are used in the formatting process.
1002 kwargs : dict
1003 Options for specifying how the values should be formatted.
1004 These options include the following:
1006 1) na_rep : str
1007 The value that serves as a placeholder for NULL values
1008 2) quoting : bool or None
1009 Whether or not there are quoted values in `self`
1010 3) date_format : str
1011 The format used to represent date-like values.
1013 Returns
1014 -------
1015 numpy.ndarray
1016 Formatted values.
1017 """
1019 values = self
1020 if slicer is not None:
1021 values = values[slicer]
1022 return values._format_native_types(**kwargs)
1024 def _format_native_types(self, na_rep="", quoting=None, **kwargs):
1025 """
1026 Actually format specific types of the index.
1027 """
1028 mask = isna(self)
1029 if not self.is_object() and not quoting:
1030 values = np.asarray(self).astype(str)
1031 else:
1032 values = np.array(self, dtype=object, copy=True)
1034 values[mask] = na_rep
1035 return values
1037 def _summary(self, name=None):
1038 """
1039 Return a summarized representation.
1041 Parameters
1042 ----------
1043 name : str
1044 name to use in the summary representation
1046 Returns
1047 -------
1048 String with a summarized representation of the index
1049 """
1050 if len(self) > 0:
1051 head = self[0]
1052 if hasattr(head, "format") and not isinstance(head, str):
1053 head = head.format()
1054 tail = self[-1]
1055 if hasattr(tail, "format") and not isinstance(tail, str):
1056 tail = tail.format()
1057 index_summary = f", {head} to {tail}"
1058 else:
1059 index_summary = ""
1061 if name is None:
1062 name = type(self).__name__
1063 return f"{name}: {len(self)} entries{index_summary}"
1065 # --------------------------------------------------------------------
1066 # Conversion Methods
1068 def to_flat_index(self):
1069 """
1070 Identity method.
1072 .. versionadded:: 0.24.0
1074 This is implemented for compatibility with subclass implementations
1075 when chaining.
1077 Returns
1078 -------
1079 pd.Index
1080 Caller.
1082 See Also
1083 --------
1084 MultiIndex.to_flat_index : Subclass implementation.
1085 """
1086 return self
1088 def to_series(self, index=None, name=None):
1089 """
1090 Create a Series with both index and values equal to the index keys.
1092 Useful with map for returning an indexer based on an index.
1094 Parameters
1095 ----------
1096 index : Index, optional
1097 Index of resulting Series. If None, defaults to original index.
1098 name : str, optional
1099 Dame of resulting Series. If None, defaults to name of original
1100 index.
1102 Returns
1103 -------
1104 Series
1105 The dtype will be based on the type of the Index values.
1106 """
1108 from pandas import Series
1110 if index is None:
1111 index = self._shallow_copy()
1112 if name is None:
1113 name = self.name
1115 return Series(self.values.copy(), index=index, name=name)
1117 def to_frame(self, index=True, name=None):
1118 """
1119 Create a DataFrame with a column containing the Index.
1121 .. versionadded:: 0.24.0
1123 Parameters
1124 ----------
1125 index : bool, default True
1126 Set the index of the returned DataFrame as the original Index.
1128 name : object, default None
1129 The passed name should substitute for the index name (if it has
1130 one).
1132 Returns
1133 -------
1134 DataFrame
1135 DataFrame containing the original Index data.
1137 See Also
1138 --------
1139 Index.to_series : Convert an Index to a Series.
1140 Series.to_frame : Convert Series to DataFrame.
1142 Examples
1143 --------
1144 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1145 >>> idx.to_frame()
1146 animal
1147 animal
1148 Ant Ant
1149 Bear Bear
1150 Cow Cow
1152 By default, the original Index is reused. To enforce a new Index:
1154 >>> idx.to_frame(index=False)
1155 animal
1156 0 Ant
1157 1 Bear
1158 2 Cow
1160 To override the name of the resulting column, specify `name`:
1162 >>> idx.to_frame(index=False, name='zoo')
1163 zoo
1164 0 Ant
1165 1 Bear
1166 2 Cow
1167 """
1169 from pandas import DataFrame
1171 if name is None:
1172 name = self.name or 0
1173 result = DataFrame({name: self._values.copy()})
1175 if index:
1176 result.index = self
1177 return result
1179 # --------------------------------------------------------------------
1180 # Name-Centric Methods
1182 @property
1183 def name(self):
1184 return self._name
1186 @name.setter
1187 def name(self, value):
1188 if self._no_setting_name:
1189 # Used in MultiIndex.levels to avoid silently ignoring name updates.
1190 raise RuntimeError(
1191 "Cannot set name on a level of a MultiIndex. Use "
1192 "'MultiIndex.set_names' instead."
1193 )
1194 maybe_extract_name(value, None, type(self))
1195 self._name = value
1197 def _validate_names(self, name=None, names=None, deep=False):
1198 """
1199 Handles the quirks of having a singular 'name' parameter for general
1200 Index and plural 'names' parameter for MultiIndex.
1201 """
1202 from copy import deepcopy
1204 if names is not None and name is not None:
1205 raise TypeError("Can only provide one of `names` and `name`")
1206 elif names is None and name is None:
1207 return deepcopy(self.names) if deep else self.names
1208 elif names is not None:
1209 if not is_list_like(names):
1210 raise TypeError("Must pass list-like as `names`.")
1211 return names
1212 else:
1213 if not is_list_like(name):
1214 return [name]
1215 return name
1217 def _get_names(self):
1218 return FrozenList((self.name,))
1220 def _set_names(self, values, level=None):
1221 """
1222 Set new names on index. Each name has to be a hashable type.
1224 Parameters
1225 ----------
1226 values : str or sequence
1227 name(s) to set
1228 level : int, level name, or sequence of int/level names (default None)
1229 If the index is a MultiIndex (hierarchical), level(s) to set (None
1230 for all levels). Otherwise level must be None
1232 Raises
1233 ------
1234 TypeError if each name is not hashable.
1235 """
1236 if not is_list_like(values):
1237 raise ValueError("Names must be a list-like")
1238 if len(values) != 1:
1239 raise ValueError(f"Length of new names must be 1, got {len(values)}")
1241 # GH 20527
1242 # All items in 'name' need to be hashable:
1243 for name in values:
1244 if not is_hashable(name):
1245 raise TypeError(f"{type(self).__name__}.name must be a hashable type")
1246 self._name = values[0]
1248 names = property(fset=_set_names, fget=_get_names)
1250 def set_names(self, names, level=None, inplace=False):
1251 """
1252 Set Index or MultiIndex name.
1254 Able to set new names partially and by level.
1256 Parameters
1257 ----------
1258 names : label or list of label
1259 Name(s) to set.
1260 level : int, label or list of int or label, optional
1261 If the index is a MultiIndex, level(s) to set (None for all
1262 levels). Otherwise level must be None.
1263 inplace : bool, default False
1264 Modifies the object directly, instead of creating a new Index or
1265 MultiIndex.
1267 Returns
1268 -------
1269 Index
1270 The same type as the caller or None if inplace is True.
1272 See Also
1273 --------
1274 Index.rename : Able to set new names without level.
1276 Examples
1277 --------
1278 >>> idx = pd.Index([1, 2, 3, 4])
1279 >>> idx
1280 Int64Index([1, 2, 3, 4], dtype='int64')
1281 >>> idx.set_names('quarter')
1282 Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
1284 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1285 ... [2018, 2019]])
1286 >>> idx
1287 MultiIndex([('python', 2018),
1288 ('python', 2019),
1289 ( 'cobra', 2018),
1290 ( 'cobra', 2019)],
1291 )
1292 >>> idx.set_names(['kind', 'year'], inplace=True)
1293 >>> idx
1294 MultiIndex([('python', 2018),
1295 ('python', 2019),
1296 ( 'cobra', 2018),
1297 ( 'cobra', 2019)],
1298 names=['kind', 'year'])
1299 >>> idx.set_names('species', level=0)
1300 MultiIndex([('python', 2018),
1301 ('python', 2019),
1302 ( 'cobra', 2018),
1303 ( 'cobra', 2019)],
1304 names=['species', 'year'])
1305 """
1307 if level is not None and not isinstance(self, ABCMultiIndex):
1308 raise ValueError("Level must be None for non-MultiIndex")
1310 if level is not None and not is_list_like(level) and is_list_like(names):
1311 raise TypeError("Names must be a string when a single level is provided.")
1313 if not is_list_like(names) and level is None and self.nlevels > 1:
1314 raise TypeError("Must pass list-like as `names`.")
1316 if not is_list_like(names):
1317 names = [names]
1318 if level is not None and not is_list_like(level):
1319 level = [level]
1321 if inplace:
1322 idx = self
1323 else:
1324 idx = self._shallow_copy()
1325 idx._set_names(names, level=level)
1326 if not inplace:
1327 return idx
1329 def rename(self, name, inplace=False):
1330 """
1331 Alter Index or MultiIndex name.
1333 Able to set new names without level. Defaults to returning new index.
1334 Length of names must match number of levels in MultiIndex.
1336 Parameters
1337 ----------
1338 name : label or list of labels
1339 Name(s) to set.
1340 inplace : bool, default False
1341 Modifies the object directly, instead of creating a new Index or
1342 MultiIndex.
1344 Returns
1345 -------
1346 Index
1347 The same type as the caller or None if inplace is True.
1349 See Also
1350 --------
1351 Index.set_names : Able to set new names partially and by level.
1353 Examples
1354 --------
1355 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
1356 >>> idx.rename('grade')
1357 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
1359 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1360 ... [2018, 2019]],
1361 ... names=['kind', 'year'])
1362 >>> idx
1363 MultiIndex([('python', 2018),
1364 ('python', 2019),
1365 ( 'cobra', 2018),
1366 ( 'cobra', 2019)],
1367 names=['kind', 'year'])
1368 >>> idx.rename(['species', 'year'])
1369 MultiIndex([('python', 2018),
1370 ('python', 2019),
1371 ( 'cobra', 2018),
1372 ( 'cobra', 2019)],
1373 names=['species', 'year'])
1374 >>> idx.rename('species')
1375 Traceback (most recent call last):
1376 TypeError: Must pass list-like as `names`.
1377 """
1378 return self.set_names([name], inplace=inplace)
1380 # --------------------------------------------------------------------
1381 # Level-Centric Methods
1383 @property
1384 def nlevels(self) -> int:
1385 """
1386 Number of levels.
1387 """
1388 return 1
1390 def _sort_levels_monotonic(self):
1391 """
1392 Compat with MultiIndex.
1393 """
1394 return self
1396 def _validate_index_level(self, level):
1397 """
1398 Validate index level.
1400 For single-level Index getting level number is a no-op, but some
1401 verification must be done like in MultiIndex.
1403 """
1404 if isinstance(level, int):
1405 if level < 0 and level != -1:
1406 raise IndexError(
1407 "Too many levels: Index has only 1 level, "
1408 f"{level} is not a valid level number"
1409 )
1410 elif level > 0:
1411 raise IndexError(
1412 f"Too many levels: Index has only 1 level, not {level + 1}"
1413 )
1414 elif level != self.name:
1415 raise KeyError(
1416 f"Requested level ({level}) does not match index name ({self.name})"
1417 )
1419 def _get_level_number(self, level):
1420 self._validate_index_level(level)
1421 return 0
1423 def sortlevel(self, level=None, ascending=True, sort_remaining=None):
1424 """
1425 For internal compatibility with with the Index API.
1427 Sort the Index. This is for compat with MultiIndex
1429 Parameters
1430 ----------
1431 ascending : bool, default True
1432 False to sort in descending order
1434 level, sort_remaining are compat parameters
1436 Returns
1437 -------
1438 Index
1439 """
1440 return self.sort_values(return_indexer=True, ascending=ascending)
1442 def _get_level_values(self, level):
1443 """
1444 Return an Index of values for requested level.
1446 This is primarily useful to get an individual level of values from a
1447 MultiIndex, but is provided on Index as well for compatibility.
1449 Parameters
1450 ----------
1451 level : int or str
1452 It is either the integer position or the name of the level.
1454 Returns
1455 -------
1456 Index
1457 Calling object, as there is only one level in the Index.
1459 See Also
1460 --------
1461 MultiIndex.get_level_values : Get values for a level of a MultiIndex.
1463 Notes
1464 -----
1465 For Index, level should be 0, since there are no multiple levels.
1467 Examples
1468 --------
1470 >>> idx = pd.Index(list('abc'))
1471 >>> idx
1472 Index(['a', 'b', 'c'], dtype='object')
1474 Get level values by supplying `level` as integer:
1476 >>> idx.get_level_values(0)
1477 Index(['a', 'b', 'c'], dtype='object')
1478 """
1479 self._validate_index_level(level)
1480 return self
1482 get_level_values = _get_level_values
1484 def droplevel(self, level=0):
1485 """
1486 Return index with requested level(s) removed.
1488 If resulting index has only 1 level left, the result will be
1489 of Index type, not MultiIndex.
1491 .. versionadded:: 0.23.1 (support for non-MultiIndex)
1493 Parameters
1494 ----------
1495 level : int, str, or list-like, default 0
1496 If a string is given, must be the name of a level
1497 If list-like, elements must be names or indexes of levels.
1499 Returns
1500 -------
1501 Index or MultiIndex
1502 """
1503 if not isinstance(level, (tuple, list)):
1504 level = [level]
1506 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
1508 if len(level) == 0:
1509 return self
1510 if len(level) >= self.nlevels:
1511 raise ValueError(
1512 f"Cannot remove {len(level)} levels from an index with {self.nlevels} "
1513 "levels: at least one level must be left."
1514 )
1515 # The two checks above guarantee that here self is a MultiIndex
1517 new_levels = list(self.levels)
1518 new_codes = list(self.codes)
1519 new_names = list(self.names)
1521 for i in levnums:
1522 new_levels.pop(i)
1523 new_codes.pop(i)
1524 new_names.pop(i)
1526 if len(new_levels) == 1:
1528 # set nan if needed
1529 mask = new_codes[0] == -1
1530 result = new_levels[0].take(new_codes[0])
1531 if mask.any():
1532 result = result.putmask(mask, np.nan)
1534 result._name = new_names[0]
1535 return result
1536 else:
1537 from pandas.core.indexes.multi import MultiIndex
1539 return MultiIndex(
1540 levels=new_levels,
1541 codes=new_codes,
1542 names=new_names,
1543 verify_integrity=False,
1544 )
1546 _index_shared_docs[
1547 "_get_grouper_for_level"
1548 ] = """
1549 Get index grouper corresponding to an index level
1551 Parameters
1552 ----------
1553 mapper: Group mapping function or None
1554 Function mapping index values to groups
1555 level : int or None
1556 Index level
1558 Returns
1559 -------
1560 grouper : Index
1561 Index of values to group on.
1562 labels : ndarray of int or None
1563 Array of locations in level_index.
1564 uniques : Index or None
1565 Index of unique values for level.
1566 """
1568 @Appender(_index_shared_docs["_get_grouper_for_level"])
1569 def _get_grouper_for_level(self, mapper, level=None):
1570 assert level is None or level == 0
1571 if mapper is None:
1572 grouper = self
1573 else:
1574 grouper = self.map(mapper)
1576 return grouper, None, None
1578 # --------------------------------------------------------------------
1579 # Introspection Methods
1581 @property
1582 def is_monotonic(self) -> bool:
1583 """
1584 Alias for is_monotonic_increasing.
1585 """
1586 return self.is_monotonic_increasing
1588 @property
1589 def is_monotonic_increasing(self):
1590 """
1591 Return if the index is monotonic increasing (only equal or
1592 increasing) values.
1594 Examples
1595 --------
1596 >>> Index([1, 2, 3]).is_monotonic_increasing
1597 True
1598 >>> Index([1, 2, 2]).is_monotonic_increasing
1599 True
1600 >>> Index([1, 3, 2]).is_monotonic_increasing
1601 False
1602 """
1603 return self._engine.is_monotonic_increasing
1605 @property
1606 def is_monotonic_decreasing(self) -> bool:
1607 """
1608 Return if the index is monotonic decreasing (only equal or
1609 decreasing) values.
1611 Examples
1612 --------
1613 >>> Index([3, 2, 1]).is_monotonic_decreasing
1614 True
1615 >>> Index([3, 2, 2]).is_monotonic_decreasing
1616 True
1617 >>> Index([3, 1, 2]).is_monotonic_decreasing
1618 False
1619 """
1620 return self._engine.is_monotonic_decreasing
1622 @property
1623 def _is_strictly_monotonic_increasing(self) -> bool:
1624 """
1625 Return if the index is strictly monotonic increasing
1626 (only increasing) values.
1628 Examples
1629 --------
1630 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
1631 True
1632 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
1633 False
1634 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
1635 False
1636 """
1637 return self.is_unique and self.is_monotonic_increasing
1639 @property
1640 def _is_strictly_monotonic_decreasing(self) -> bool:
1641 """
1642 Return if the index is strictly monotonic decreasing
1643 (only decreasing) values.
1645 Examples
1646 --------
1647 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
1648 True
1649 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
1650 False
1651 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
1652 False
1653 """
1654 return self.is_unique and self.is_monotonic_decreasing
1656 @cache_readonly
1657 def is_unique(self) -> bool:
1658 """
1659 Return if the index has unique values.
1660 """
1661 return self._engine.is_unique
1663 @property
1664 def has_duplicates(self) -> bool:
1665 return not self.is_unique
1667 def is_boolean(self) -> bool:
1668 return self.inferred_type in ["boolean"]
1670 def is_integer(self) -> bool:
1671 return self.inferred_type in ["integer"]
1673 def is_floating(self) -> bool:
1674 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
1676 def is_numeric(self) -> bool:
1677 return self.inferred_type in ["integer", "floating"]
1679 def is_object(self) -> bool:
1680 return is_object_dtype(self.dtype)
1682 def is_categorical(self) -> bool:
1683 """
1684 Check if the Index holds categorical data.
1686 Returns
1687 -------
1688 boolean
1689 True if the Index is categorical.
1691 See Also
1692 --------
1693 CategoricalIndex : Index for categorical data.
1695 Examples
1696 --------
1697 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
1698 ... "Watermelon"]).astype("category")
1699 >>> idx.is_categorical()
1700 True
1702 >>> idx = pd.Index([1, 3, 5, 7])
1703 >>> idx.is_categorical()
1704 False
1706 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
1707 >>> s
1708 0 Peter
1709 1 Victor
1710 2 Elisabeth
1711 3 Mar
1712 dtype: object
1713 >>> s.index.is_categorical()
1714 False
1715 """
1716 return self.inferred_type in ["categorical"]
1718 def is_interval(self) -> bool:
1719 return self.inferred_type in ["interval"]
1721 def is_mixed(self) -> bool:
1722 return self.inferred_type in ["mixed"]
1724 def holds_integer(self):
1725 """
1726 Whether the type is an integer type.
1727 """
1728 return self.inferred_type in ["integer", "mixed-integer"]
1730 @cache_readonly
1731 def inferred_type(self):
1732 """
1733 Return a string of the type inferred from the values.
1734 """
1735 return lib.infer_dtype(self, skipna=False)
1737 @cache_readonly
1738 def is_all_dates(self) -> bool:
1739 return is_datetime_array(ensure_object(self.values))
1741 # --------------------------------------------------------------------
1742 # Pickle Methods
1744 def __reduce__(self):
1745 d = dict(data=self._data)
1746 d.update(self._get_attributes_dict())
1747 return _new_Index, (type(self), d), None
1749 # --------------------------------------------------------------------
1750 # Null Handling Methods
1752 _na_value = np.nan
1753 """The expected NA value to use with this index."""
1755 @cache_readonly
1756 def _isnan(self):
1757 """
1758 Return if each value is NaN.
1759 """
1760 if self._can_hold_na:
1761 return isna(self)
1762 else:
1763 # shouldn't reach to this condition by checking hasnans beforehand
1764 values = np.empty(len(self), dtype=np.bool_)
1765 values.fill(False)
1766 return values
1768 @cache_readonly
1769 def _nan_idxs(self):
1770 if self._can_hold_na:
1771 return self._isnan.nonzero()[0]
1772 else:
1773 return np.array([], dtype=np.int64)
1775 @cache_readonly
1776 def hasnans(self):
1777 """
1778 Return if I have any nans; enables various perf speedups.
1779 """
1780 if self._can_hold_na:
1781 return bool(self._isnan.any())
1782 else:
1783 return False
1785 def isna(self):
1786 """
1787 Detect missing values.
1789 Return a boolean same-sized object indicating if the values are NA.
1790 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
1791 mapped to ``True`` values.
1792 Everything else get mapped to ``False`` values. Characters such as
1793 empty strings `''` or :attr:`numpy.inf` are not considered NA values
1794 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
1796 Returns
1797 -------
1798 numpy.ndarray
1799 A boolean array of whether my values are NA.
1801 See Also
1802 --------
1803 Index.notna : Boolean inverse of isna.
1804 Index.dropna : Omit entries with missing values.
1805 isna : Top-level isna.
1806 Series.isna : Detect missing values in Series object.
1808 Examples
1809 --------
1810 Show which entries in a pandas.Index are NA. The result is an
1811 array.
1813 >>> idx = pd.Index([5.2, 6.0, np.NaN])
1814 >>> idx
1815 Float64Index([5.2, 6.0, nan], dtype='float64')
1816 >>> idx.isna()
1817 array([False, False, True], dtype=bool)
1819 Empty strings are not considered NA values. None is considered an NA
1820 value.
1822 >>> idx = pd.Index(['black', '', 'red', None])
1823 >>> idx
1824 Index(['black', '', 'red', None], dtype='object')
1825 >>> idx.isna()
1826 array([False, False, False, True], dtype=bool)
1828 For datetimes, `NaT` (Not a Time) is considered as an NA value.
1830 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
1831 ... pd.Timestamp(''), None, pd.NaT])
1832 >>> idx
1833 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
1834 dtype='datetime64[ns]', freq=None)
1835 >>> idx.isna()
1836 array([False, True, True, True], dtype=bool)
1837 """
1838 return self._isnan
1840 isnull = isna
1842 def notna(self):
1843 """
1844 Detect existing (non-missing) values.
1846 Return a boolean same-sized object indicating if the values are not NA.
1847 Non-missing values get mapped to ``True``. Characters such as empty
1848 strings ``''`` or :attr:`numpy.inf` are not considered NA values
1849 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
1850 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
1851 values.
1853 Returns
1854 -------
1855 numpy.ndarray
1856 Boolean array to indicate which entries are not NA.
1858 See Also
1859 --------
1860 Index.notnull : Alias of notna.
1861 Index.isna: Inverse of notna.
1862 notna : Top-level notna.
1864 Examples
1865 --------
1866 Show which entries in an Index are not NA. The result is an
1867 array.
1869 >>> idx = pd.Index([5.2, 6.0, np.NaN])
1870 >>> idx
1871 Float64Index([5.2, 6.0, nan], dtype='float64')
1872 >>> idx.notna()
1873 array([ True, True, False])
1875 Empty strings are not considered NA values. None is considered a NA
1876 value.
1878 >>> idx = pd.Index(['black', '', 'red', None])
1879 >>> idx
1880 Index(['black', '', 'red', None], dtype='object')
1881 >>> idx.notna()
1882 array([ True, True, True, False])
1883 """
1884 return ~self.isna()
1886 notnull = notna
1888 _index_shared_docs[
1889 "fillna"
1890 ] = """
1891 Fill NA/NaN values with the specified value.
1893 Parameters
1894 ----------
1895 value : scalar
1896 Scalar value to use to fill holes (e.g. 0).
1897 This value cannot be a list-likes.
1898 downcast : dict, default is None
1899 a dict of item->dtype of what to downcast if possible,
1900 or the string 'infer' which will try to downcast to an appropriate
1901 equal type (e.g. float64 to int64 if possible).
1903 Returns
1904 -------
1905 filled : Index
1906 """
1908 @Appender(_index_shared_docs["fillna"])
1909 def fillna(self, value=None, downcast=None):
1910 self._assert_can_do_op(value)
1911 if self.hasnans:
1912 result = self.putmask(self._isnan, value)
1913 if downcast is None:
1914 # no need to care metadata other than name
1915 # because it can't have freq if
1916 return Index(result, name=self.name)
1917 return self._shallow_copy()
1919 _index_shared_docs[
1920 "dropna"
1921 ] = """
1922 Return Index without NA/NaN values.
1924 Parameters
1925 ----------
1926 how : {'any', 'all'}, default 'any'
1927 If the Index is a MultiIndex, drop the value when any or all levels
1928 are NaN.
1930 Returns
1931 -------
1932 valid : Index
1933 """
1935 @Appender(_index_shared_docs["dropna"])
1936 def dropna(self, how="any"):
1937 if how not in ("any", "all"):
1938 raise ValueError(f"invalid how option: {how}")
1940 if self.hasnans:
1941 return self._shallow_copy(self._values[~self._isnan])
1942 return self._shallow_copy()
1944 # --------------------------------------------------------------------
1945 # Uniqueness Methods
1947 _index_shared_docs[
1948 "index_unique"
1949 ] = """
1950 Return unique values in the index. Uniques are returned in order
1951 of appearance, this does NOT sort.
1953 Parameters
1954 ----------
1955 level : int or str, optional, default None
1956 Only return values from specified level (for MultiIndex).
1958 .. versionadded:: 0.23.0
1960 Returns
1961 -------
1962 Index without duplicates
1964 See Also
1965 --------
1966 unique
1967 Series.unique
1968 """
1970 @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs)
1971 def unique(self, level=None):
1972 if level is not None:
1973 self._validate_index_level(level)
1974 result = super().unique()
1975 return self._shallow_copy(result)
1977 def drop_duplicates(self, keep="first"):
1978 """
1979 Return Index with duplicate values removed.
1981 Parameters
1982 ----------
1983 keep : {'first', 'last', ``False``}, default 'first'
1984 - 'first' : Drop duplicates except for the first occurrence.
1985 - 'last' : Drop duplicates except for the last occurrence.
1986 - ``False`` : Drop all duplicates.
1988 Returns
1989 -------
1990 deduplicated : Index
1992 See Also
1993 --------
1994 Series.drop_duplicates : Equivalent method on Series.
1995 DataFrame.drop_duplicates : Equivalent method on DataFrame.
1996 Index.duplicated : Related method on Index, indicating duplicate
1997 Index values.
1999 Examples
2000 --------
2001 Generate an pandas.Index with duplicate values.
2003 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
2005 The `keep` parameter controls which duplicate values are removed.
2006 The value 'first' keeps the first occurrence for each
2007 set of duplicated entries. The default value of keep is 'first'.
2009 >>> idx.drop_duplicates(keep='first')
2010 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
2012 The value 'last' keeps the last occurrence for each set of duplicated
2013 entries.
2015 >>> idx.drop_duplicates(keep='last')
2016 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
2018 The value ``False`` discards all sets of duplicated entries.
2020 >>> idx.drop_duplicates(keep=False)
2021 Index(['cow', 'beetle', 'hippo'], dtype='object')
2022 """
2023 return super().drop_duplicates(keep=keep)
2025 def duplicated(self, keep="first"):
2026 """
2027 Indicate duplicate index values.
2029 Duplicated values are indicated as ``True`` values in the resulting
2030 array. Either all duplicates, all except the first, or all except the
2031 last occurrence of duplicates can be indicated.
2033 Parameters
2034 ----------
2035 keep : {'first', 'last', False}, default 'first'
2036 The value or values in a set of duplicates to mark as missing.
2038 - 'first' : Mark duplicates as ``True`` except for the first
2039 occurrence.
2040 - 'last' : Mark duplicates as ``True`` except for the last
2041 occurrence.
2042 - ``False`` : Mark all duplicates as ``True``.
2044 Returns
2045 -------
2046 numpy.ndarray
2048 See Also
2049 --------
2050 Series.duplicated : Equivalent method on pandas.Series.
2051 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
2052 Index.drop_duplicates : Remove duplicate values from Index.
2054 Examples
2055 --------
2056 By default, for each set of duplicated values, the first occurrence is
2057 set to False and all others to True:
2059 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
2060 >>> idx.duplicated()
2061 array([False, False, True, False, True])
2063 which is equivalent to
2065 >>> idx.duplicated(keep='first')
2066 array([False, False, True, False, True])
2068 By using 'last', the last occurrence of each set of duplicated values
2069 is set on False and all others on True:
2071 >>> idx.duplicated(keep='last')
2072 array([ True, False, True, False, False])
2074 By setting keep on ``False``, all duplicates are True:
2076 >>> idx.duplicated(keep=False)
2077 array([ True, False, True, False, True])
2078 """
2079 return super().duplicated(keep=keep)
2081 def _get_unique_index(self, dropna=False):
2082 """
2083 Returns an index containing unique values.
2085 Parameters
2086 ----------
2087 dropna : bool
2088 If True, NaN values are dropped.
2090 Returns
2091 -------
2092 uniques : index
2093 """
2094 if self.is_unique and not dropna:
2095 return self
2097 values = self.values
2099 if not self.is_unique:
2100 values = self.unique()
2102 if dropna:
2103 try:
2104 if self.hasnans:
2105 values = values[~isna(values)]
2106 except NotImplementedError:
2107 pass
2109 return self._shallow_copy(values)
2111 # --------------------------------------------------------------------
2112 # Arithmetic & Logical Methods
2114 def __add__(self, other):
2115 if isinstance(other, (ABCSeries, ABCDataFrame)):
2116 return NotImplemented
2117 from pandas import Series
2119 return Index(Series(self) + other)
2121 def __radd__(self, other):
2122 from pandas import Series
2124 return Index(other + Series(self))
2126 def __iadd__(self, other):
2127 # alias for __add__
2128 return self + other
2130 def __sub__(self, other):
2131 return Index(np.array(self) - other)
2133 def __rsub__(self, other):
2134 # wrap Series to ensure we pin name correctly
2135 from pandas import Series
2137 return Index(other - Series(self))
2139 def __and__(self, other):
2140 return self.intersection(other)
2142 def __or__(self, other):
2143 return self.union(other)
2145 def __xor__(self, other):
2146 return self.symmetric_difference(other)
2148 def __nonzero__(self):
2149 raise ValueError(
2150 f"The truth value of a {type(self).__name__} is ambiguous. "
2151 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
2152 )
2154 __bool__ = __nonzero__
2156 # --------------------------------------------------------------------
2157 # Set Operation Methods
2159 def _get_reconciled_name_object(self, other):
2160 """
2161 If the result of a set operation will be self,
2162 return self, unless the name changes, in which
2163 case make a shallow copy of self.
2164 """
2165 name = get_op_result_name(self, other)
2166 if self.name != name:
2167 return self._shallow_copy(name=name)
2168 return self
2170 def _union_incompatible_dtypes(self, other, sort):
2171 """
2172 Casts this and other index to object dtype to allow the formation
2173 of a union between incompatible types.
2175 Parameters
2176 ----------
2177 other : Index or array-like
2178 sort : False or None, default False
2179 Whether to sort the resulting index.
2181 * False : do not sort the result.
2182 * None : sort the result, except when `self` and `other` are equal
2183 or when the values cannot be compared.
2185 Returns
2186 -------
2187 Index
2188 """
2189 this = self.astype(object, copy=False)
2190 # cast to Index for when `other` is list-like
2191 other = Index(other).astype(object, copy=False)
2192 return Index.union(this, other, sort=sort).astype(object, copy=False)
2194 def _is_compatible_with_other(self, other):
2195 """
2196 Check whether this and the other dtype are compatible with each other.
2197 Meaning a union can be formed between them without needing to be cast
2198 to dtype object.
2200 Parameters
2201 ----------
2202 other : Index or array-like
2204 Returns
2205 -------
2206 bool
2207 """
2208 return type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype)
2210 def _validate_sort_keyword(self, sort):
2211 if sort not in [None, False]:
2212 raise ValueError(
2213 "The 'sort' keyword only takes the values of "
2214 f"None or False; {sort} was passed."
2215 )
2217 def union(self, other, sort=None):
2218 """
2219 Form the union of two Index objects.
2221 If the Index objects are incompatible, both Index objects will be
2222 cast to dtype('object') first.
2224 .. versionchanged:: 0.25.0
2226 Parameters
2227 ----------
2228 other : Index or array-like
2229 sort : bool or None, default None
2230 Whether to sort the resulting Index.
2232 * None : Sort the result, except when
2234 1. `self` and `other` are equal.
2235 2. `self` or `other` has length 0.
2236 3. Some values in `self` or `other` cannot be compared.
2237 A RuntimeWarning is issued in this case.
2239 * False : do not sort the result.
2241 .. versionadded:: 0.24.0
2243 .. versionchanged:: 0.24.1
2245 Changed the default value from ``True`` to ``None``
2246 (without change in behaviour).
2248 Returns
2249 -------
2250 union : Index
2252 Examples
2253 --------
2255 Union matching dtypes
2257 >>> idx1 = pd.Index([1, 2, 3, 4])
2258 >>> idx2 = pd.Index([3, 4, 5, 6])
2259 >>> idx1.union(idx2)
2260 Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
2262 Union mismatched dtypes
2264 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
2265 >>> idx2 = pd.Index([1, 2, 3, 4])
2266 >>> idx1.union(idx2)
2267 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
2268 """
2269 self._validate_sort_keyword(sort)
2270 self._assert_can_do_setop(other)
2272 if not self._is_compatible_with_other(other):
2273 return self._union_incompatible_dtypes(other, sort=sort)
2275 return self._union(other, sort=sort)
2277 def _union(self, other, sort):
2278 """
2279 Specific union logic should go here. In subclasses, union behavior
2280 should be overwritten here rather than in `self.union`.
2282 Parameters
2283 ----------
2284 other : Index or array-like
2285 sort : False or None, default False
2286 Whether to sort the resulting index.
2288 * False : do not sort the result.
2289 * None : sort the result, except when `self` and `other` are equal
2290 or when the values cannot be compared.
2292 Returns
2293 -------
2294 Index
2295 """
2297 if not len(other) or self.equals(other):
2298 return self._get_reconciled_name_object(other)
2300 if not len(self):
2301 return other._get_reconciled_name_object(self)
2303 # TODO(EA): setops-refactor, clean all this up
2304 if is_datetime64tz_dtype(self):
2305 lvals = self._ndarray_values
2306 else:
2307 lvals = self._values
2308 if is_datetime64tz_dtype(other):
2309 rvals = other._ndarray_values
2310 else:
2311 rvals = other._values
2313 if sort is None and self.is_monotonic and other.is_monotonic:
2314 try:
2315 result = self._outer_indexer(lvals, rvals)[0]
2316 except TypeError:
2317 # incomparable objects
2318 result = list(lvals)
2320 # worth making this faster? a very unusual case
2321 value_set = set(lvals)
2322 result.extend([x for x in rvals if x not in value_set])
2323 else:
2324 # find indexes of things in "other" that are not in "self"
2325 if self.is_unique:
2326 indexer = self.get_indexer(other)
2327 indexer = (indexer == -1).nonzero()[0]
2328 else:
2329 indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
2331 if len(indexer) > 0:
2332 other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
2333 result = concat_compat((lvals, other_diff))
2335 else:
2336 result = lvals
2338 if sort is None:
2339 try:
2340 result = algos.safe_sort(result)
2341 except TypeError as err:
2342 warnings.warn(
2343 f"{err}, sort order is undefined for incomparable objects",
2344 RuntimeWarning,
2345 stacklevel=3,
2346 )
2348 # for subclasses
2349 return self._wrap_setop_result(other, result)
2351 def _wrap_setop_result(self, other, result):
2352 return self._constructor(result, name=get_op_result_name(self, other))
2354 _index_shared_docs[
2355 "intersection"
2356 ] = """
2357 Form the intersection of two Index objects.
2359 This returns a new Index with elements common to the index and `other`.
2361 Parameters
2362 ----------
2363 other : Index or array-like
2364 sort : False or None, default False
2365 Whether to sort the resulting index.
2367 * False : do not sort the result.
2368 * None : sort the result, except when `self` and `other` are equal
2369 or when the values cannot be compared.
2371 .. versionadded:: 0.24.0
2373 .. versionchanged:: 0.24.1
2375 Changed the default from ``True`` to ``False``, to match
2376 the behaviour of 0.23.4 and earlier.
2378 Returns
2379 -------
2380 intersection : Index
2382 Examples
2383 --------
2385 >>> idx1 = pd.Index([1, 2, 3, 4])
2386 >>> idx2 = pd.Index([3, 4, 5, 6])
2387 >>> idx1.intersection(idx2)
2388 Int64Index([3, 4], dtype='int64')
2389 """
2391 # TODO: standardize return type of non-union setops type(self vs other)
2392 @Appender(_index_shared_docs["intersection"])
2393 def intersection(self, other, sort=False):
2394 self._validate_sort_keyword(sort)
2395 self._assert_can_do_setop(other)
2396 other = ensure_index(other)
2398 if self.equals(other):
2399 return self._get_reconciled_name_object(other)
2401 if not is_dtype_equal(self.dtype, other.dtype):
2402 this = self.astype("O")
2403 other = other.astype("O")
2404 return this.intersection(other, sort=sort)
2406 # TODO(EA): setops-refactor, clean all this up
2407 lvals = self._values
2408 rvals = other._values
2410 if self.is_monotonic and other.is_monotonic:
2411 try:
2412 result = self._inner_indexer(lvals, rvals)[0]
2413 return self._wrap_setop_result(other, result)
2414 except TypeError:
2415 pass
2417 try:
2418 indexer = Index(rvals).get_indexer(lvals)
2419 indexer = indexer.take((indexer != -1).nonzero()[0])
2420 except (InvalidIndexError, IncompatibleFrequency):
2421 # InvalidIndexError raised by get_indexer if non-unique
2422 # IncompatibleFrequency raised by PeriodIndex.get_indexer
2423 indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0])
2424 indexer = indexer[indexer != -1]
2426 taken = other.take(indexer)
2427 res_name = get_op_result_name(self, other)
2429 if sort is None:
2430 taken = algos.safe_sort(taken.values)
2431 return self._shallow_copy(taken, name=res_name)
2433 taken.name = res_name
2434 return taken
2436 def difference(self, other, sort=None):
2437 """
2438 Return a new Index with elements from the index that are not in
2439 `other`.
2441 This is the set difference of two Index objects.
2443 Parameters
2444 ----------
2445 other : Index or array-like
2446 sort : False or None, default None
2447 Whether to sort the resulting index. By default, the
2448 values are attempted to be sorted, but any TypeError from
2449 incomparable elements is caught by pandas.
2451 * None : Attempt to sort the result, but catch any TypeErrors
2452 from comparing incomparable elements.
2453 * False : Do not sort the result.
2455 .. versionadded:: 0.24.0
2457 .. versionchanged:: 0.24.1
2459 Changed the default value from ``True`` to ``None``
2460 (without change in behaviour).
2462 Returns
2463 -------
2464 difference : Index
2466 Examples
2467 --------
2469 >>> idx1 = pd.Index([2, 1, 3, 4])
2470 >>> idx2 = pd.Index([3, 4, 5, 6])
2471 >>> idx1.difference(idx2)
2472 Int64Index([1, 2], dtype='int64')
2473 >>> idx1.difference(idx2, sort=False)
2474 Int64Index([2, 1], dtype='int64')
2475 """
2476 self._validate_sort_keyword(sort)
2477 self._assert_can_do_setop(other)
2479 if self.equals(other):
2480 # pass an empty np.ndarray with the appropriate dtype
2481 return self._shallow_copy(self._data[:0])
2483 other, result_name = self._convert_can_do_setop(other)
2485 this = self._get_unique_index()
2487 indexer = this.get_indexer(other)
2488 indexer = indexer.take((indexer != -1).nonzero()[0])
2490 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
2491 the_diff = this.values.take(label_diff)
2492 if sort is None:
2493 try:
2494 the_diff = algos.safe_sort(the_diff)
2495 except TypeError:
2496 pass
2498 return this._shallow_copy(the_diff, name=result_name)
2500 def symmetric_difference(self, other, result_name=None, sort=None):
2501 """
2502 Compute the symmetric difference of two Index objects.
2504 Parameters
2505 ----------
2506 other : Index or array-like
2507 result_name : str
2508 sort : False or None, default None
2509 Whether to sort the resulting index. By default, the
2510 values are attempted to be sorted, but any TypeError from
2511 incomparable elements is caught by pandas.
2513 * None : Attempt to sort the result, but catch any TypeErrors
2514 from comparing incomparable elements.
2515 * False : Do not sort the result.
2517 .. versionadded:: 0.24.0
2519 .. versionchanged:: 0.24.1
2521 Changed the default value from ``True`` to ``None``
2522 (without change in behaviour).
2524 Returns
2525 -------
2526 symmetric_difference : Index
2528 Notes
2529 -----
2530 ``symmetric_difference`` contains elements that appear in either
2531 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
2532 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
2533 dropped.
2535 Examples
2536 --------
2537 >>> idx1 = pd.Index([1, 2, 3, 4])
2538 >>> idx2 = pd.Index([2, 3, 4, 5])
2539 >>> idx1.symmetric_difference(idx2)
2540 Int64Index([1, 5], dtype='int64')
2542 You can also use the ``^`` operator:
2544 >>> idx1 ^ idx2
2545 Int64Index([1, 5], dtype='int64')
2546 """
2547 self._validate_sort_keyword(sort)
2548 self._assert_can_do_setop(other)
2549 other, result_name_update = self._convert_can_do_setop(other)
2550 if result_name is None:
2551 result_name = result_name_update
2553 this = self._get_unique_index()
2554 other = other._get_unique_index()
2555 indexer = this.get_indexer(other)
2557 # {this} minus {other}
2558 common_indexer = indexer.take((indexer != -1).nonzero()[0])
2559 left_indexer = np.setdiff1d(
2560 np.arange(this.size), common_indexer, assume_unique=True
2561 )
2562 left_diff = this._values.take(left_indexer)
2564 # {other} minus {this}
2565 right_indexer = (indexer == -1).nonzero()[0]
2566 right_diff = other._values.take(right_indexer)
2568 the_diff = concat_compat([left_diff, right_diff])
2569 if sort is None:
2570 try:
2571 the_diff = algos.safe_sort(the_diff)
2572 except TypeError:
2573 pass
2575 attribs = self._get_attributes_dict()
2576 attribs["name"] = result_name
2577 if "freq" in attribs:
2578 attribs["freq"] = None
2579 return self._shallow_copy_with_infer(the_diff, **attribs)
2581 def _assert_can_do_setop(self, other):
2582 if not is_list_like(other):
2583 raise TypeError("Input must be Index or array-like")
2584 return True
2586 def _convert_can_do_setop(self, other):
2587 if not isinstance(other, Index):
2588 other = Index(other, name=self.name)
2589 result_name = self.name
2590 else:
2591 result_name = get_op_result_name(self, other)
2592 return other, result_name
2594 # --------------------------------------------------------------------
2595 # Indexing Methods
2597 _index_shared_docs[
2598 "get_loc"
2599 ] = """
2600 Get integer location, slice or boolean mask for requested label.
2602 Parameters
2603 ----------
2604 key : label
2605 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
2606 * default: exact matches only.
2607 * pad / ffill: find the PREVIOUS index value if no exact match.
2608 * backfill / bfill: use NEXT index value if no exact match
2609 * nearest: use the NEAREST index value if no exact match. Tied
2610 distances are broken by preferring the larger index value.
2611 tolerance : int or float, optional
2612 Maximum distance from index value for inexact matches. The value of
2613 the index at the matching location most satisfy the equation
2614 ``abs(index[loc] - key) <= tolerance``.
2616 .. versionadded:: 0.21.0 (list-like tolerance)
2618 Returns
2619 -------
2620 loc : int if unique index, slice if monotonic index, else mask
2622 Examples
2623 --------
2624 >>> unique_index = pd.Index(list('abc'))
2625 >>> unique_index.get_loc('b')
2626 1
2628 >>> monotonic_index = pd.Index(list('abbc'))
2629 >>> monotonic_index.get_loc('b')
2630 slice(1, 3, None)
2632 >>> non_monotonic_index = pd.Index(list('abcb'))
2633 >>> non_monotonic_index.get_loc('b')
2634 array([False, True, False, True], dtype=bool)
2635 """
2637 @Appender(_index_shared_docs["get_loc"])
2638 def get_loc(self, key, method=None, tolerance=None):
2639 if method is None:
2640 if tolerance is not None:
2641 raise ValueError(
2642 "tolerance argument only valid if using pad, "
2643 "backfill or nearest lookups"
2644 )
2645 try:
2646 return self._engine.get_loc(key)
2647 except KeyError:
2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
2651 raise TypeError("get_loc requires scalar valued input")
2652 loc = indexer.item()
2653 if loc == -1:
2654 raise KeyError(key)
2655 return loc
2657 _index_shared_docs[
2658 "get_indexer"
2659 ] = """
2660 Compute indexer and mask for new index given the current index. The
2661 indexer should be then used as an input to ndarray.take to align the
2662 current data to the new index.
2664 Parameters
2665 ----------
2666 target : %(target_klass)s
2667 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
2668 * default: exact matches only.
2669 * pad / ffill: find the PREVIOUS index value if no exact match.
2670 * backfill / bfill: use NEXT index value if no exact match
2671 * nearest: use the NEAREST index value if no exact match. Tied
2672 distances are broken by preferring the larger index value.
2673 limit : int, optional
2674 Maximum number of consecutive labels in ``target`` to match for
2675 inexact matches.
2676 tolerance : optional
2677 Maximum distance between original and new labels for inexact
2678 matches. The values of the index at the matching locations most
2679 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
2681 Tolerance may be a scalar value, which applies the same tolerance
2682 to all values, or list-like, which applies variable tolerance per
2683 element. List-like includes list, tuple, array, Series, and must be
2684 the same size as the index and its dtype must exactly match the
2685 index's type.
2687 .. versionadded:: 0.21.0 (list-like tolerance)
2689 Returns
2690 -------
2691 indexer : ndarray of int
2692 Integers from 0 to n - 1 indicating that the index at these
2693 positions matches the corresponding target values. Missing values
2694 in the target are marked by -1.
2695 %(raises_section)s
2696 Examples
2697 --------
2698 >>> index = pd.Index(['c', 'a', 'b'])
2699 >>> index.get_indexer(['a', 'b', 'x'])
2700 array([ 1, 2, -1])
2702 Notice that the return value is an array of locations in ``index``
2703 and ``x`` is marked by -1, as it is not in ``index``.
2704 """
2706 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
2707 def get_indexer(self, target, method=None, limit=None, tolerance=None):
2708 method = missing.clean_reindex_fill_method(method)
2709 target = ensure_index(target)
2710 if tolerance is not None:
2711 tolerance = self._convert_tolerance(tolerance, target)
2713 # Treat boolean labels passed to a numeric index as not found. Without
2714 # this fix False and True would be treated as 0 and 1 respectively.
2715 # (GH #16877)
2716 if target.is_boolean() and self.is_numeric():
2717 return ensure_platform_int(np.repeat(-1, target.size))
2719 pself, ptarget = self._maybe_promote(target)
2720 if pself is not self or ptarget is not target:
2721 return pself.get_indexer(
2722 ptarget, method=method, limit=limit, tolerance=tolerance
2723 )
2725 if not is_dtype_equal(self.dtype, target.dtype):
2726 this = self.astype(object)
2727 target = target.astype(object)
2728 return this.get_indexer(
2729 target, method=method, limit=limit, tolerance=tolerance
2730 )
2732 if not self.is_unique:
2733 raise InvalidIndexError(
2734 "Reindexing only valid with uniquely valued Index objects"
2735 )
2737 if method == "pad" or method == "backfill":
2738 indexer = self._get_fill_indexer(target, method, limit, tolerance)
2739 elif method == "nearest":
2740 indexer = self._get_nearest_indexer(target, limit, tolerance)
2741 else:
2742 if tolerance is not None:
2743 raise ValueError(
2744 "tolerance argument only valid if doing pad, "
2745 "backfill or nearest reindexing"
2746 )
2747 if limit is not None:
2748 raise ValueError(
2749 "limit argument only valid if doing pad, "
2750 "backfill or nearest reindexing"
2751 )
2753 indexer = self._engine.get_indexer(target._ndarray_values)
2755 return ensure_platform_int(indexer)
2757 def _convert_tolerance(self, tolerance, target):
2758 # override this method on subclasses
2759 tolerance = np.asarray(tolerance)
2760 if target.size != tolerance.size and tolerance.size > 1:
2761 raise ValueError("list-like tolerance size must match target index size")
2762 return tolerance
2764 def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
2765 if self.is_monotonic_increasing and target.is_monotonic_increasing:
2766 method = (
2767 self._engine.get_pad_indexer
2768 if method == "pad"
2769 else self._engine.get_backfill_indexer
2770 )
2771 indexer = method(target._ndarray_values, limit)
2772 else:
2773 indexer = self._get_fill_indexer_searchsorted(target, method, limit)
2774 if tolerance is not None:
2775 indexer = self._filter_indexer_tolerance(
2776 target._ndarray_values, indexer, tolerance
2777 )
2778 return indexer
2780 def _get_fill_indexer_searchsorted(self, target, method, limit=None):
2781 """
2782 Fallback pad/backfill get_indexer that works for monotonic decreasing
2783 indexes and non-monotonic targets.
2784 """
2785 if limit is not None:
2786 raise ValueError(
2787 f"limit argument for {repr(method)} method only well-defined "
2788 "if index and target are monotonic"
2789 )
2791 side = "left" if method == "pad" else "right"
2793 # find exact matches first (this simplifies the algorithm)
2794 indexer = self.get_indexer(target)
2795 nonexact = indexer == -1
2796 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
2797 if side == "left":
2798 # searchsorted returns "indices into a sorted array such that,
2799 # if the corresponding elements in v were inserted before the
2800 # indices, the order of a would be preserved".
2801 # Thus, we need to subtract 1 to find values to the left.
2802 indexer[nonexact] -= 1
2803 # This also mapped not found values (values of 0 from
2804 # np.searchsorted) to -1, which conveniently is also our
2805 # sentinel for missing values
2806 else:
2807 # Mark indices to the right of the largest value as not found
2808 indexer[indexer == len(self)] = -1
2809 return indexer
2811 def _get_nearest_indexer(self, target, limit, tolerance):
2812 """
2813 Get the indexer for the nearest index labels; requires an index with
2814 values that can be subtracted from each other (e.g., not strings or
2815 tuples).
2816 """
2817 left_indexer = self.get_indexer(target, "pad", limit=limit)
2818 right_indexer = self.get_indexer(target, "backfill", limit=limit)
2820 target_values = target._values
2821 left_distances = np.abs(self._values[left_indexer] - target_values)
2822 right_distances = np.abs(self._values[right_indexer] - target_values)
2824 op = operator.lt if self.is_monotonic_increasing else operator.le
2825 indexer = np.where(
2826 op(left_distances, right_distances) | (right_indexer == -1),
2827 left_indexer,
2828 right_indexer,
2829 )
2830 if tolerance is not None:
2831 indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance)
2832 return indexer
2834 def _filter_indexer_tolerance(self, target, indexer, tolerance):
2835 distance = abs(self._values[indexer] - target)
2836 indexer = np.where(distance <= tolerance, indexer, -1)
2837 return indexer
2839 # --------------------------------------------------------------------
2840 # Indexer Conversion Methods
2842 _index_shared_docs[
2843 "_convert_scalar_indexer"
2844 ] = """
2845 Convert a scalar indexer.
2847 Parameters
2848 ----------
2849 key : label of the slice bound
2850 kind : {'ix', 'loc', 'getitem', 'iloc'} or None
2851 """
2853 @Appender(_index_shared_docs["_convert_scalar_indexer"])
2854 def _convert_scalar_indexer(self, key, kind=None):
2855 assert kind in ["ix", "loc", "getitem", "iloc", None]
2857 if kind == "iloc":
2858 return self._validate_indexer("positional", key, kind)
2860 if len(self) and not isinstance(self, ABCMultiIndex):
2862 # we can raise here if we are definitive that this
2863 # is positional indexing (eg. .ix on with a float)
2864 # or label indexing if we are using a type able
2865 # to be represented in the index
2867 if kind in ["getitem", "ix"] and is_float(key):
2868 if not self.is_floating():
2869 return self._invalid_indexer("label", key)
2871 elif kind in ["loc"] and is_float(key):
2873 # we want to raise KeyError on string/mixed here
2874 # technically we *could* raise a TypeError
2875 # on anything but mixed though
2876 if self.inferred_type not in [
2877 "floating",
2878 "mixed-integer-float",
2879 "integer-na",
2880 "string",
2881 "unicode",
2882 "mixed",
2883 ]:
2884 self._invalid_indexer("label", key)
2886 elif kind in ["loc"] and is_integer(key):
2887 if not self.holds_integer():
2888 self._invalid_indexer("label", key)
2890 return key
2892 _index_shared_docs[
2893 "_convert_slice_indexer"
2894 ] = """
2895 Convert a slice indexer.
2897 By definition, these are labels unless 'iloc' is passed in.
2898 Floats are not allowed as the start, step, or stop of the slice.
2900 Parameters
2901 ----------
2902 key : label of the slice bound
2903 kind : {'ix', 'loc', 'getitem', 'iloc'} or None
2904 """
2906 @Appender(_index_shared_docs["_convert_slice_indexer"])
2907 def _convert_slice_indexer(self, key: slice, kind=None):
2908 assert kind in ["ix", "loc", "getitem", "iloc", None]
2910 # validate iloc
2911 if kind == "iloc":
2912 return slice(
2913 self._validate_indexer("slice", key.start, kind),
2914 self._validate_indexer("slice", key.stop, kind),
2915 self._validate_indexer("slice", key.step, kind),
2916 )
2918 # potentially cast the bounds to integers
2919 start, stop, step = key.start, key.stop, key.step
2921 # figure out if this is a positional indexer
2922 def is_int(v):
2923 return v is None or is_integer(v)
2925 is_null_slicer = start is None and stop is None
2926 is_index_slice = is_int(start) and is_int(stop)
2927 is_positional = is_index_slice and not (
2928 self.is_integer() or self.is_categorical()
2929 )
2931 if kind == "getitem":
2932 """
2933 called from the getitem slicers, validate that we are in fact
2934 integers
2935 """
2936 if self.is_integer() or is_index_slice:
2937 return slice(
2938 self._validate_indexer("slice", key.start, kind),
2939 self._validate_indexer("slice", key.stop, kind),
2940 self._validate_indexer("slice", key.step, kind),
2941 )
2943 # convert the slice to an indexer here
2945 # if we are mixed and have integers
2946 try:
2947 if is_positional and self.is_mixed():
2948 # Validate start & stop
2949 if start is not None:
2950 self.get_loc(start)
2951 if stop is not None:
2952 self.get_loc(stop)
2953 is_positional = False
2954 except KeyError:
2955 if self.inferred_type in ["mixed-integer-float", "integer-na"]:
2956 raise
2958 if is_null_slicer:
2959 indexer = key
2960 elif is_positional:
2961 indexer = key
2962 else:
2963 indexer = self.slice_indexer(start, stop, step, kind=kind)
2965 return indexer
2967 def _convert_listlike_indexer(self, keyarr, kind=None):
2968 """
2969 Parameters
2970 ----------
2971 keyarr : list-like
2972 Indexer to convert.
2974 Returns
2975 -------
2976 indexer : numpy.ndarray or None
2977 Return an ndarray or None if cannot convert.
2978 keyarr : numpy.ndarray
2979 Return tuple-safe keys.
2980 """
2981 if isinstance(keyarr, Index):
2982 keyarr = self._convert_index_indexer(keyarr)
2983 else:
2984 keyarr = self._convert_arr_indexer(keyarr)
2986 indexer = self._convert_list_indexer(keyarr, kind=kind)
2987 return indexer, keyarr
2989 _index_shared_docs[
2990 "_convert_arr_indexer"
2991 ] = """
2992 Convert an array-like indexer to the appropriate dtype.
2994 Parameters
2995 ----------
2996 keyarr : array-like
2997 Indexer to convert.
2999 Returns
3000 -------
3001 converted_keyarr : array-like
3002 """
3004 @Appender(_index_shared_docs["_convert_arr_indexer"])
3005 def _convert_arr_indexer(self, keyarr):
3006 keyarr = com.asarray_tuplesafe(keyarr)
3007 return keyarr
3009 _index_shared_docs[
3010 "_convert_index_indexer"
3011 ] = """
3012 Convert an Index indexer to the appropriate dtype.
3014 Parameters
3015 ----------
3016 keyarr : Index (or sub-class)
3017 Indexer to convert.
3019 Returns
3020 -------
3021 converted_keyarr : Index (or sub-class)
3022 """
3024 @Appender(_index_shared_docs["_convert_index_indexer"])
3025 def _convert_index_indexer(self, keyarr):
3026 return keyarr
3028 _index_shared_docs[
3029 "_convert_list_indexer"
3030 ] = """
3031 Convert a list-like indexer to the appropriate dtype.
3033 Parameters
3034 ----------
3035 keyarr : Index (or sub-class)
3036 Indexer to convert.
3037 kind : iloc, ix, loc, optional
3039 Returns
3040 -------
3041 positional indexer or None
3042 """
3044 @Appender(_index_shared_docs["_convert_list_indexer"])
3045 def _convert_list_indexer(self, keyarr, kind=None):
3046 if (
3047 kind in [None, "iloc", "ix"]
3048 and is_integer_dtype(keyarr)
3049 and not self.is_floating()
3050 and not isinstance(keyarr, ABCPeriodIndex)
3051 ):
3053 if self.inferred_type == "mixed-integer":
3054 indexer = self.get_indexer(keyarr)
3055 if (indexer >= 0).all():
3056 return indexer
3057 # missing values are flagged as -1 by get_indexer and negative
3058 # indices are already converted to positive indices in the
3059 # above if-statement, so the negative flags are changed to
3060 # values outside the range of indices so as to trigger an
3061 # IndexError in maybe_convert_indices
3062 indexer[indexer < 0] = len(self)
3064 return maybe_convert_indices(indexer, len(self))
3066 elif not self.inferred_type == "integer":
3067 keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
3068 return keyarr
3070 return None
3072 def _invalid_indexer(self, form, key):
3073 """
3074 Consistent invalid indexer message.
3075 """
3076 raise TypeError(
3077 f"cannot do {form} indexing on {type(self)} with these "
3078 f"indexers [{key}] of {type(key)}"
3079 )
3081 # --------------------------------------------------------------------
3082 # Reindex Methods
3084 def _can_reindex(self, indexer):
3085 """
3086 Check if we are allowing reindexing with this particular indexer.
3088 Parameters
3089 ----------
3090 indexer : an integer indexer
3092 Raises
3093 ------
3094 ValueError if its a duplicate axis
3095 """
3097 # trying to reindex on an axis with duplicates
3098 if not self.is_unique and len(indexer):
3099 raise ValueError("cannot reindex from a duplicate axis")
3101 def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
3102 """
3103 Create index with target's values (move/add/delete values
3104 as necessary).
3106 Parameters
3107 ----------
3108 target : an iterable
3110 Returns
3111 -------
3112 new_index : pd.Index
3113 Resulting index.
3114 indexer : np.ndarray or None
3115 Indices of output values in original index.
3116 """
3117 # GH6552: preserve names when reindexing to non-named target
3118 # (i.e. neither Index nor Series).
3119 preserve_names = not hasattr(target, "name")
3121 # GH7774: preserve dtype/tz if target is empty and not an Index.
3122 target = _ensure_has_len(target) # target may be an iterator
3124 if not isinstance(target, Index) and len(target) == 0:
3125 attrs = self._get_attributes_dict()
3126 attrs.pop("freq", None) # don't preserve freq
3127 values = self._data[:0] # appropriately-dtyped empty array
3128 target = self._simple_new(values, dtype=self.dtype, **attrs)
3129 else:
3130 target = ensure_index(target)
3132 if level is not None:
3133 if method is not None:
3134 raise TypeError("Fill method not supported if level passed")
3135 _, indexer, _ = self._join_level(
3136 target, level, how="right", return_indexers=True
3137 )
3138 else:
3139 if self.equals(target):
3140 indexer = None
3141 else:
3142 # check is_overlapping for IntervalIndex compat
3143 if self.is_unique and not getattr(self, "is_overlapping", False):
3144 indexer = self.get_indexer(
3145 target, method=method, limit=limit, tolerance=tolerance
3146 )
3147 else:
3148 if method is not None or limit is not None:
3149 raise ValueError(
3150 "cannot reindex a non-unique index "
3151 "with a method or limit"
3152 )
3153 indexer, missing = self.get_indexer_non_unique(target)
3155 if preserve_names and target.nlevels == 1 and target.name != self.name:
3156 target = target.copy()
3157 target.name = self.name
3159 return target, indexer
3161 def _reindex_non_unique(self, target):
3162 """
3163 Create a new index with target's values (move/add/delete values as
3164 necessary) use with non-unique Index and a possibly non-unique target.
3166 Parameters
3167 ----------
3168 target : an iterable
3170 Returns
3171 -------
3172 new_index : pd.Index
3173 Resulting index.
3174 indexer : np.ndarray or None
3175 Indices of output values in original index.
3177 """
3179 target = ensure_index(target)
3180 indexer, missing = self.get_indexer_non_unique(target)
3181 check = indexer != -1
3182 new_labels = self.take(indexer[check])
3183 new_indexer = None
3185 if len(missing):
3186 length = np.arange(len(indexer))
3188 missing = ensure_platform_int(missing)
3189 missing_labels = target.take(missing)
3190 missing_indexer = ensure_int64(length[~check])
3191 cur_labels = self.take(indexer[check]).values
3192 cur_indexer = ensure_int64(length[check])
3194 new_labels = np.empty(tuple([len(indexer)]), dtype=object)
3195 new_labels[cur_indexer] = cur_labels
3196 new_labels[missing_indexer] = missing_labels
3198 # a unique indexer
3199 if target.is_unique:
3201 # see GH5553, make sure we use the right indexer
3202 new_indexer = np.arange(len(indexer))
3203 new_indexer[cur_indexer] = np.arange(len(cur_labels))
3204 new_indexer[missing_indexer] = -1
3206 # we have a non_unique selector, need to use the original
3207 # indexer here
3208 else:
3210 # need to retake to have the same size as the indexer
3211 indexer[~check] = -1
3213 # reset the new indexer to account for the new size
3214 new_indexer = np.arange(len(self.take(indexer)))
3215 new_indexer[~check] = -1
3217 new_index = self._shallow_copy_with_infer(new_labels)
3218 return new_index, indexer, new_indexer
3220 # --------------------------------------------------------------------
3221 # Join Methods
3223 _index_shared_docs[
3224 "join"
3225 ] = """
3226 Compute join_index and indexers to conform data
3227 structures to the new index.
3229 Parameters
3230 ----------
3231 other : Index
3232 how : {'left', 'right', 'inner', 'outer'}
3233 level : int or level name, default None
3234 return_indexers : bool, default False
3235 sort : bool, default False
3236 Sort the join keys lexicographically in the result Index. If False,
3237 the order of the join keys depends on the join type (how keyword).
3239 Returns
3240 -------
3241 join_index, (left_indexer, right_indexer)
3242 """
3244 @Appender(_index_shared_docs["join"])
3245 def join(self, other, how="left", level=None, return_indexers=False, sort=False):
3246 self_is_mi = isinstance(self, ABCMultiIndex)
3247 other_is_mi = isinstance(other, ABCMultiIndex)
3249 # try to figure out the join level
3250 # GH3662
3251 if level is None and (self_is_mi or other_is_mi):
3253 # have the same levels/names so a simple join
3254 if self.names == other.names:
3255 pass
3256 else:
3257 return self._join_multi(other, how=how, return_indexers=return_indexers)
3259 # join on the level
3260 if level is not None and (self_is_mi or other_is_mi):
3261 return self._join_level(
3262 other, level, how=how, return_indexers=return_indexers
3263 )
3265 other = ensure_index(other)
3267 if len(other) == 0 and how in ("left", "outer"):
3268 join_index = self._shallow_copy()
3269 if return_indexers:
3270 rindexer = np.repeat(-1, len(join_index))
3271 return join_index, None, rindexer
3272 else:
3273 return join_index
3275 if len(self) == 0 and how in ("right", "outer"):
3276 join_index = other._shallow_copy()
3277 if return_indexers:
3278 lindexer = np.repeat(-1, len(join_index))
3279 return join_index, lindexer, None
3280 else:
3281 return join_index
3283 if self._join_precedence < other._join_precedence:
3284 how = {"right": "left", "left": "right"}.get(how, how)
3285 result = other.join(
3286 self, how=how, level=level, return_indexers=return_indexers
3287 )
3288 if return_indexers:
3289 x, y, z = result
3290 result = x, z, y
3291 return result
3293 if not is_dtype_equal(self.dtype, other.dtype):
3294 this = self.astype("O")
3295 other = other.astype("O")
3296 return this.join(other, how=how, return_indexers=return_indexers)
3298 _validate_join_method(how)
3300 if not self.is_unique and not other.is_unique:
3301 return self._join_non_unique(
3302 other, how=how, return_indexers=return_indexers
3303 )
3304 elif not self.is_unique or not other.is_unique:
3305 if self.is_monotonic and other.is_monotonic:
3306 return self._join_monotonic(
3307 other, how=how, return_indexers=return_indexers
3308 )
3309 else:
3310 return self._join_non_unique(
3311 other, how=how, return_indexers=return_indexers
3312 )
3313 elif self.is_monotonic and other.is_monotonic:
3314 try:
3315 return self._join_monotonic(
3316 other, how=how, return_indexers=return_indexers
3317 )
3318 except TypeError:
3319 pass
3321 if how == "left":
3322 join_index = self
3323 elif how == "right":
3324 join_index = other
3325 elif how == "inner":
3326 # TODO: sort=False here for backwards compat. It may
3327 # be better to use the sort parameter passed into join
3328 join_index = self.intersection(other, sort=False)
3329 elif how == "outer":
3330 # TODO: sort=True here for backwards compat. It may
3331 # be better to use the sort parameter passed into join
3332 join_index = self.union(other)
3334 if sort:
3335 join_index = join_index.sort_values()
3337 if return_indexers:
3338 if join_index is self:
3339 lindexer = None
3340 else:
3341 lindexer = self.get_indexer(join_index)
3342 if join_index is other:
3343 rindexer = None
3344 else:
3345 rindexer = other.get_indexer(join_index)
3346 return join_index, lindexer, rindexer
3347 else:
3348 return join_index
3350 def _join_multi(self, other, how, return_indexers=True):
3351 from pandas.core.indexes.multi import MultiIndex
3352 from pandas.core.reshape.merge import _restore_dropped_levels_multijoin
3354 # figure out join names
3355 self_names = set(com.not_none(*self.names))
3356 other_names = set(com.not_none(*other.names))
3357 overlap = self_names & other_names
3359 # need at least 1 in common
3360 if not overlap:
3361 raise ValueError("cannot join with no overlapping index names")
3363 self_is_mi = isinstance(self, MultiIndex)
3364 other_is_mi = isinstance(other, MultiIndex)
3366 if self_is_mi and other_is_mi:
3368 # Drop the non-matching levels from left and right respectively
3369 ldrop_names = list(self_names - overlap)
3370 rdrop_names = list(other_names - overlap)
3372 # if only the order differs
3373 if not len(ldrop_names + rdrop_names):
3374 self_jnlevels = self
3375 other_jnlevels = other.reorder_levels(self.names)
3376 else:
3377 self_jnlevels = self.droplevel(ldrop_names)
3378 other_jnlevels = other.droplevel(rdrop_names)
3380 # Join left and right
3381 # Join on same leveled multi-index frames is supported
3382 join_idx, lidx, ridx = self_jnlevels.join(
3383 other_jnlevels, how, return_indexers=True
3384 )
3386 # Restore the dropped levels
3387 # Returned index level order is
3388 # common levels, ldrop_names, rdrop_names
3389 dropped_names = ldrop_names + rdrop_names
3391 levels, codes, names = _restore_dropped_levels_multijoin(
3392 self, other, dropped_names, join_idx, lidx, ridx
3393 )
3395 # Re-create the multi-index
3396 multi_join_idx = MultiIndex(
3397 levels=levels, codes=codes, names=names, verify_integrity=False
3398 )
3400 multi_join_idx = multi_join_idx.remove_unused_levels()
3402 return multi_join_idx, lidx, ridx
3404 jl = list(overlap)[0]
3406 # Case where only one index is multi
3407 # make the indices into mi's that match
3408 flip_order = False
3409 if self_is_mi:
3410 self, other = other, self
3411 flip_order = True
3412 # flip if join method is right or left
3413 how = {"right": "left", "left": "right"}.get(how, how)
3415 level = other.names.index(jl)
3416 result = self._join_level(
3417 other, level, how=how, return_indexers=return_indexers
3418 )
3420 if flip_order:
3421 if isinstance(result, tuple):
3422 return result[0], result[2], result[1]
3423 return result
3425 def _join_non_unique(self, other, how="left", return_indexers=False):
3426 from pandas.core.reshape.merge import _get_join_indexers
3428 left_idx, right_idx = _get_join_indexers(
3429 [self._ndarray_values], [other._ndarray_values], how=how, sort=True
3430 )
3432 left_idx = ensure_platform_int(left_idx)
3433 right_idx = ensure_platform_int(right_idx)
3435 join_index = np.asarray(self._ndarray_values.take(left_idx))
3436 mask = left_idx == -1
3437 np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
3439 join_index = self._wrap_joined_index(join_index, other)
3441 if return_indexers:
3442 return join_index, left_idx, right_idx
3443 else:
3444 return join_index
3446 def _join_level(
3447 self, other, level, how="left", return_indexers=False, keep_order=True
3448 ):
3449 """
3450 The join method *only* affects the level of the resulting
3451 MultiIndex. Otherwise it just exactly aligns the Index data to the
3452 labels of the level in the MultiIndex.
3454 If ```keep_order == True```, the order of the data indexed by the
3455 MultiIndex will not be changed; otherwise, it will tie out
3456 with `other`.
3457 """
3458 from pandas.core.indexes.multi import MultiIndex
3460 def _get_leaf_sorter(labels):
3461 """
3462 Returns sorter for the inner most level while preserving the
3463 order of higher levels.
3464 """
3465 if labels[0].size == 0:
3466 return np.empty(0, dtype="int64")
3468 if len(labels) == 1:
3469 lab = ensure_int64(labels[0])
3470 sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
3471 return sorter
3473 # find indexers of beginning of each set of
3474 # same-key labels w.r.t all but last level
3475 tic = labels[0][:-1] != labels[0][1:]
3476 for lab in labels[1:-1]:
3477 tic |= lab[:-1] != lab[1:]
3479 starts = np.hstack(([True], tic, [True])).nonzero()[0]
3480 lab = ensure_int64(labels[-1])
3481 return lib.get_level_sorter(lab, ensure_int64(starts))
3483 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
3484 raise TypeError("Join on level between two MultiIndex objects is ambiguous")
3486 left, right = self, other
3488 flip_order = not isinstance(self, MultiIndex)
3489 if flip_order:
3490 left, right = right, left
3491 how = {"right": "left", "left": "right"}.get(how, how)
3493 level = left._get_level_number(level)
3494 old_level = left.levels[level]
3496 if not right.is_unique:
3497 raise NotImplementedError(
3498 "Index._join_level on non-unique index is not implemented"
3499 )
3501 new_level, left_lev_indexer, right_lev_indexer = old_level.join(
3502 right, how=how, return_indexers=True
3503 )
3505 if left_lev_indexer is None:
3506 if keep_order or len(left) == 0:
3507 left_indexer = None
3508 join_index = left
3509 else: # sort the leaves
3510 left_indexer = _get_leaf_sorter(left.codes[: level + 1])
3511 join_index = left[left_indexer]
3513 else:
3514 left_lev_indexer = ensure_int64(left_lev_indexer)
3515 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
3517 new_lev_codes = algos.take_nd(
3518 rev_indexer, left.codes[level], allow_fill=False
3519 )
3521 new_codes = list(left.codes)
3522 new_codes[level] = new_lev_codes
3524 new_levels = list(left.levels)
3525 new_levels[level] = new_level
3527 if keep_order: # just drop missing values. o.w. keep order
3528 left_indexer = np.arange(len(left), dtype=np.intp)
3529 mask = new_lev_codes != -1
3530 if not mask.all():
3531 new_codes = [lab[mask] for lab in new_codes]
3532 left_indexer = left_indexer[mask]
3534 else: # tie out the order with other
3535 if level == 0: # outer most level, take the fast route
3536 ngroups = 1 + new_lev_codes.max()
3537 left_indexer, counts = libalgos.groupsort_indexer(
3538 new_lev_codes, ngroups
3539 )
3541 # missing values are placed first; drop them!
3542 left_indexer = left_indexer[counts[0] :]
3543 new_codes = [lab[left_indexer] for lab in new_codes]
3545 else: # sort the leaves
3546 mask = new_lev_codes != -1
3547 mask_all = mask.all()
3548 if not mask_all:
3549 new_codes = [lab[mask] for lab in new_codes]
3551 left_indexer = _get_leaf_sorter(new_codes[: level + 1])
3552 new_codes = [lab[left_indexer] for lab in new_codes]
3554 # left_indexers are w.r.t masked frame.
3555 # reverse to original frame!
3556 if not mask_all:
3557 left_indexer = mask.nonzero()[0][left_indexer]
3559 join_index = MultiIndex(
3560 levels=new_levels,
3561 codes=new_codes,
3562 names=left.names,
3563 verify_integrity=False,
3564 )
3566 if right_lev_indexer is not None:
3567 right_indexer = algos.take_nd(
3568 right_lev_indexer, join_index.codes[level], allow_fill=False
3569 )
3570 else:
3571 right_indexer = join_index.codes[level]
3573 if flip_order:
3574 left_indexer, right_indexer = right_indexer, left_indexer
3576 if return_indexers:
3577 left_indexer = (
3578 None if left_indexer is None else ensure_platform_int(left_indexer)
3579 )
3580 right_indexer = (
3581 None if right_indexer is None else ensure_platform_int(right_indexer)
3582 )
3583 return join_index, left_indexer, right_indexer
3584 else:
3585 return join_index
3587 def _join_monotonic(self, other, how="left", return_indexers=False):
3588 if self.equals(other):
3589 ret_index = other if how == "right" else self
3590 if return_indexers:
3591 return ret_index, None, None
3592 else:
3593 return ret_index
3595 sv = self._ndarray_values
3596 ov = other._ndarray_values
3598 if self.is_unique and other.is_unique:
3599 # We can perform much better than the general case
3600 if how == "left":
3601 join_index = self
3602 lidx = None
3603 ridx = self._left_indexer_unique(sv, ov)
3604 elif how == "right":
3605 join_index = other
3606 lidx = self._left_indexer_unique(ov, sv)
3607 ridx = None
3608 elif how == "inner":
3609 join_index, lidx, ridx = self._inner_indexer(sv, ov)
3610 join_index = self._wrap_joined_index(join_index, other)
3611 elif how == "outer":
3612 join_index, lidx, ridx = self._outer_indexer(sv, ov)
3613 join_index = self._wrap_joined_index(join_index, other)
3614 else:
3615 if how == "left":
3616 join_index, lidx, ridx = self._left_indexer(sv, ov)
3617 elif how == "right":
3618 join_index, ridx, lidx = self._left_indexer(ov, sv)
3619 elif how == "inner":
3620 join_index, lidx, ridx = self._inner_indexer(sv, ov)
3621 elif how == "outer":
3622 join_index, lidx, ridx = self._outer_indexer(sv, ov)
3623 join_index = self._wrap_joined_index(join_index, other)
3625 if return_indexers:
3626 lidx = None if lidx is None else ensure_platform_int(lidx)
3627 ridx = None if ridx is None else ensure_platform_int(ridx)
3628 return join_index, lidx, ridx
3629 else:
3630 return join_index
3632 def _wrap_joined_index(self, joined, other):
3633 name = get_op_result_name(self, other)
3634 return Index(joined, name=name)
3636 # --------------------------------------------------------------------
3637 # Uncategorized Methods
3639 @property
3640 def values(self):
3641 """
3642 Return an array representing the data in the Index.
3644 .. warning::
3646 We recommend using :attr:`Index.array` or
3647 :meth:`Index.to_numpy`, depending on whether you need
3648 a reference to the underlying data or a NumPy array.
3650 Returns
3651 -------
3652 array: numpy.ndarray or ExtensionArray
3654 See Also
3655 --------
3656 Index.array : Reference to the underlying data.
3657 Index.to_numpy : A NumPy array representing the underlying data.
3658 """
3659 return self._data.view(np.ndarray)
3661 @cache_readonly
3662 @Appender(IndexOpsMixin.array.__doc__) # type: ignore
3663 def array(self) -> ExtensionArray:
3664 array = self._data
3665 if isinstance(array, np.ndarray):
3666 from pandas.core.arrays.numpy_ import PandasArray
3668 array = PandasArray(array)
3669 return array
3671 @property
3672 def _values(self) -> Union[ExtensionArray, ABCIndexClass, np.ndarray]:
3673 # TODO(EA): remove index types as they become extension arrays
3674 """
3675 The best array representation.
3677 This is an ndarray, ExtensionArray, or Index subclass. This differs
3678 from ``_ndarray_values``, which always returns an ndarray.
3680 Both ``_values`` and ``_ndarray_values`` are consistent between
3681 ``Series`` and ``Index``.
3683 It may differ from the public '.values' method.
3685 index | values | _values | _ndarray_values |
3686 ----------------- | --------------- | ------------- | --------------- |
3687 Index | ndarray | ndarray | ndarray |
3688 CategoricalIndex | Categorical | Categorical | ndarray[int] |
3689 DatetimeIndex | ndarray[M8ns] | ndarray[M8ns] | ndarray[M8ns] |
3690 DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] |
3691 PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] |
3692 IntervalIndex | IntervalArray | IntervalArray | ndarray[object] |
3694 See Also
3695 --------
3696 values
3697 _ndarray_values
3698 """
3699 return self._data
3701 def _internal_get_values(self):
3702 """
3703 Return `Index` data as an `numpy.ndarray`.
3705 Returns
3706 -------
3707 numpy.ndarray
3708 A one-dimensional numpy array of the `Index` values.
3710 See Also
3711 --------
3712 Index.values : The attribute that _internal_get_values wraps.
3714 Examples
3715 --------
3716 Getting the `Index` values of a `DataFrame`:
3718 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
3719 ... index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
3720 >>> df
3721 A B C
3722 a 1 2 3
3723 b 4 5 6
3724 c 7 8 9
3725 >>> df.index._internal_get_values()
3726 array(['a', 'b', 'c'], dtype=object)
3728 Standalone `Index` values:
3730 >>> idx = pd.Index(['1', '2', '3'])
3731 >>> idx._internal_get_values()
3732 array(['1', '2', '3'], dtype=object)
3734 `MultiIndex` arrays also have only one dimension:
3736 >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
3737 ... names=('number', 'letter'))
3738 >>> midx._internal_get_values()
3739 array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
3740 >>> midx._internal_get_values().ndim
3741 1
3742 """
3743 return self.values
3745 @Appender(IndexOpsMixin.memory_usage.__doc__)
3746 def memory_usage(self, deep=False):
3747 result = super().memory_usage(deep=deep)
3749 # include our engine hashtable
3750 result += self._engine.sizeof(deep=deep)
3751 return result
3753 _index_shared_docs[
3754 "where"
3755 ] = """
3756 Return an Index of same shape as self and whose corresponding
3757 entries are from self where cond is True and otherwise are from
3758 other.
3760 Parameters
3761 ----------
3762 cond : bool array-like with the same length as self
3763 other : scalar, or array-like
3765 Returns
3766 -------
3767 Index
3768 """
3770 @Appender(_index_shared_docs["where"])
3771 def where(self, cond, other=None):
3772 if other is None:
3773 other = self._na_value
3775 dtype = self.dtype
3776 values = self.values
3778 if is_bool(other) or is_bool_dtype(other):
3780 # bools force casting
3781 values = values.astype(object)
3782 dtype = None
3784 values = np.where(cond, values, other)
3786 if self._is_numeric_dtype and np.any(isna(values)):
3787 # We can't coerce to the numeric dtype of "self" (unless
3788 # it's float) if there are NaN values in our output.
3789 dtype = None
3791 return self._shallow_copy_with_infer(values, dtype=dtype)
3793 # construction helpers
3794 @classmethod
3795 def _scalar_data_error(cls, data):
3796 # We return the TypeError so that we can raise it from the constructor
3797 # in order to keep mypy happy
3798 return TypeError(
3799 f"{cls.__name__}(...) must be called with a collection of some "
3800 f"kind, {repr(data)} was passed"
3801 )
3803 @classmethod
3804 def _string_data_error(cls, data):
3805 raise TypeError(
3806 "String dtype not supported, you may need "
3807 "to explicitly cast to a numeric type"
3808 )
3810 def _coerce_scalar_to_index(self, item):
3811 """
3812 We need to coerce a scalar to a compat for our index type.
3814 Parameters
3815 ----------
3816 item : scalar item to coerce
3817 """
3818 dtype = self.dtype
3820 if self._is_numeric_dtype and isna(item):
3821 # We can't coerce to the numeric dtype of "self" (unless
3822 # it's float) if there are NaN values in our output.
3823 dtype = None
3825 return Index([item], dtype=dtype, **self._get_attributes_dict())
3827 def _to_safe_for_reshape(self):
3828 """
3829 Convert to object if we are a categorical.
3830 """
3831 return self
3833 def _convert_for_op(self, value):
3834 """
3835 Convert value to be insertable to ndarray.
3836 """
3837 return value
3839 def _assert_can_do_op(self, value):
3840 """
3841 Check value is valid for scalar op.
3842 """
3843 if not is_scalar(value):
3844 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
3846 @property
3847 def _has_complex_internals(self):
3848 """
3849 Indicates if an index is not directly backed by a numpy array
3850 """
3851 # used to avoid libreduction code paths, which raise or require conversion
3852 return False
3854 def _is_memory_usage_qualified(self) -> bool:
3855 """
3856 Return a boolean if we need a qualified .info display.
3857 """
3858 return self.is_object()
3860 def is_type_compatible(self, kind) -> bool:
3861 """
3862 Whether the index type is compatible with the provided type.
3863 """
3864 return kind == self.inferred_type
3866 _index_shared_docs[
3867 "contains"
3868 ] = """
3869 Return a boolean indicating whether the provided key is in the index.
3871 Parameters
3872 ----------
3873 key : label
3874 The key to check if it is present in the index.
3876 Returns
3877 -------
3878 bool
3879 Whether the key search is in the index.
3881 See Also
3882 --------
3883 Index.isin : Returns an ndarray of boolean dtype indicating whether the
3884 list-like key is in the index.
3886 Examples
3887 --------
3888 >>> idx = pd.Index([1, 2, 3, 4])
3889 >>> idx
3890 Int64Index([1, 2, 3, 4], dtype='int64')
3892 >>> 2 in idx
3893 True
3894 >>> 6 in idx
3895 False
3896 """
3898 @Appender(_index_shared_docs["contains"] % _index_doc_kwargs)
3899 def __contains__(self, key) -> bool:
3900 hash(key)
3901 try:
3902 return key in self._engine
3903 except (OverflowError, TypeError, ValueError):
3904 return False
3906 def __hash__(self):
3907 raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
3909 def __setitem__(self, key, value):
3910 raise TypeError("Index does not support mutable operations")
3912 def __getitem__(self, key):
3913 """
3914 Override numpy.ndarray's __getitem__ method to work as desired.
3916 This function adds lists and Series as valid boolean indexers
3917 (ndarrays only supports ndarray with dtype=bool).
3919 If resulting ndim != 1, plain ndarray is returned instead of
3920 corresponding `Index` subclass.
3922 """
3923 # There's no custom logic to be implemented in __getslice__, so it's
3924 # not overloaded intentionally.
3925 getitem = self._data.__getitem__
3926 promote = self._shallow_copy
3928 if is_scalar(key):
3929 key = com.cast_scalar_indexer(key)
3930 return getitem(key)
3932 if isinstance(key, slice):
3933 # This case is separated from the conditional above to avoid
3934 # pessimization of basic indexing.
3935 return promote(getitem(key))
3937 if com.is_bool_indexer(key):
3938 key = np.asarray(key, dtype=bool)
3940 key = com.values_from_object(key)
3941 result = getitem(key)
3942 if not is_scalar(result):
3943 if np.ndim(result) > 1:
3944 deprecate_ndim_indexing(result)
3945 return result
3946 return promote(result)
3947 else:
3948 return result
3950 def _can_hold_identifiers_and_holds_name(self, name) -> bool:
3951 """
3952 Faster check for ``name in self`` when we know `name` is a Python
3953 identifier (e.g. in NDFrame.__getattr__, which hits this to support
3954 . key lookup). For indexes that can't hold identifiers (everything
3955 but object & categorical) we just return False.
3957 https://github.com/pandas-dev/pandas/issues/19764
3958 """
3959 if self.is_object() or self.is_categorical():
3960 return name in self
3961 return False
3963 def append(self, other):
3964 """
3965 Append a collection of Index options together.
3967 Parameters
3968 ----------
3969 other : Index or list/tuple of indices
3971 Returns
3972 -------
3973 appended : Index
3974 """
3976 to_concat = [self]
3978 if isinstance(other, (list, tuple)):
3979 to_concat = to_concat + list(other)
3980 else:
3981 to_concat.append(other)
3983 for obj in to_concat:
3984 if not isinstance(obj, Index):
3985 raise TypeError("all inputs must be Index")
3987 names = {obj.name for obj in to_concat}
3988 name = None if len(names) > 1 else self.name
3990 return self._concat(to_concat, name)
3992 def _concat(self, to_concat, name):
3994 typs = _concat.get_dtype_kinds(to_concat)
3996 if len(typs) == 1:
3997 return self._concat_same_dtype(to_concat, name=name)
3998 return Index._concat_same_dtype(self, to_concat, name=name)
4000 def _concat_same_dtype(self, to_concat, name):
4001 """
4002 Concatenate to_concat which has the same class.
4003 """
4004 # must be overridden in specific classes
4005 klasses = (
4006 ABCDatetimeIndex,
4007 ABCTimedeltaIndex,
4008 ABCPeriodIndex,
4009 ExtensionArray,
4010 ABCIntervalIndex,
4011 )
4012 to_concat = [
4013 x.astype(object) if isinstance(x, klasses) else x for x in to_concat
4014 ]
4016 self = to_concat[0]
4017 attribs = self._get_attributes_dict()
4018 attribs["name"] = name
4020 to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]
4022 return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
4024 def putmask(self, mask, value):
4025 """
4026 Return a new Index of the values set with the mask.
4028 Returns
4029 -------
4030 Index
4032 See Also
4033 --------
4034 numpy.ndarray.putmask
4035 """
4036 values = self.values.copy()
4037 try:
4038 np.putmask(values, mask, self._convert_for_op(value))
4039 return self._shallow_copy(values)
4040 except (ValueError, TypeError) as err:
4041 if is_object_dtype(self):
4042 raise err
4044 # coerces to object
4045 return self.astype(object).putmask(mask, value)
4047 def equals(self, other) -> bool:
4048 """
4049 Determine if two Index objects contain the same elements.
4051 Returns
4052 -------
4053 bool
4054 True if "other" is an Index and it has the same elements as calling
4055 index; False otherwise.
4056 """
4057 if self.is_(other):
4058 return True
4060 if not isinstance(other, Index):
4061 return False
4063 if is_object_dtype(self) and not is_object_dtype(other):
4064 # if other is not object, use other's logic for coercion
4065 return other.equals(self)
4067 if isinstance(other, ABCMultiIndex):
4068 # d-level MultiIndex can equal d-tuple Index
4069 if not is_object_dtype(self.dtype):
4070 if self.nlevels != other.nlevels:
4071 return False
4073 return array_equivalent(
4074 com.values_from_object(self), com.values_from_object(other)
4075 )
4077 def identical(self, other) -> bool:
4078 """
4079 Similar to equals, but check that other comparable attributes are
4080 also equal.
4082 Returns
4083 -------
4084 bool
4085 If two Index objects have equal elements and same type True,
4086 otherwise False.
4087 """
4088 return (
4089 self.equals(other)
4090 and all(
4091 (
4092 getattr(self, c, None) == getattr(other, c, None)
4093 for c in self._comparables
4094 )
4095 )
4096 and type(self) == type(other)
4097 )
4099 def asof(self, label):
4100 """
4101 Return the label from the index, or, if not present, the previous one.
4103 Assuming that the index is sorted, return the passed index label if it
4104 is in the index, or return the previous index label if the passed one
4105 is not in the index.
4107 Parameters
4108 ----------
4109 label : object
4110 The label up to which the method returns the latest index label.
4112 Returns
4113 -------
4114 object
4115 The passed label if it is in the index. The previous label if the
4116 passed label is not in the sorted index or `NaN` if there is no
4117 such label.
4119 See Also
4120 --------
4121 Series.asof : Return the latest value in a Series up to the
4122 passed index.
4123 merge_asof : Perform an asof merge (similar to left join but it
4124 matches on nearest key rather than equal key).
4125 Index.get_loc : An `asof` is a thin wrapper around `get_loc`
4126 with method='pad'.
4128 Examples
4129 --------
4130 `Index.asof` returns the latest index label up to the passed label.
4132 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
4133 >>> idx.asof('2014-01-01')
4134 '2013-12-31'
4136 If the label is in the index, the method returns the passed label.
4138 >>> idx.asof('2014-01-02')
4139 '2014-01-02'
4141 If all of the labels in the index are later than the passed label,
4142 NaN is returned.
4144 >>> idx.asof('1999-01-02')
4145 nan
4147 If the index is not sorted, an error is raised.
4149 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
4150 ... '2014-01-03'])
4151 >>> idx_not_sorted.asof('2013-12-31')
4152 Traceback (most recent call last):
4153 ValueError: index must be monotonic increasing or decreasing
4154 """
4155 try:
4156 loc = self.get_loc(label, method="pad")
4157 except KeyError:
4158 return self._na_value
4159 else:
4160 if isinstance(loc, slice):
4161 loc = loc.indices(len(self))[-1]
4162 return self[loc]
4164 def asof_locs(self, where, mask):
4165 """
4166 Find the locations (indices) of the labels from the index for
4167 every entry in the `where` argument.
4169 As in the `asof` function, if the label (a particular entry in
4170 `where`) is not in the index, the latest index label up to the
4171 passed label is chosen and its index returned.
4173 If all of the labels in the index are later than a label in `where`,
4174 -1 is returned.
4176 `mask` is used to ignore NA values in the index during calculation.
4178 Parameters
4179 ----------
4180 where : Index
4181 An Index consisting of an array of timestamps.
4182 mask : array-like
4183 Array of booleans denoting where values in the original
4184 data are not NA.
4186 Returns
4187 -------
4188 numpy.ndarray
4189 An array of locations (indices) of the labels from the Index
4190 which correspond to the return values of the `asof` function
4191 for every element in `where`.
4192 """
4193 locs = self.values[mask].searchsorted(where.values, side="right")
4194 locs = np.where(locs > 0, locs - 1, 0)
4196 result = np.arange(len(self))[mask].take(locs)
4198 first = mask.argmax()
4199 result[(locs == 0) & (where.values < self.values[first])] = -1
4201 return result
4203 def sort_values(self, return_indexer=False, ascending=True):
4204 """
4205 Return a sorted copy of the index.
4207 Return a sorted copy of the index, and optionally return the indices
4208 that sorted the index itself.
4210 Parameters
4211 ----------
4212 return_indexer : bool, default False
4213 Should the indices that would sort the index be returned.
4214 ascending : bool, default True
4215 Should the index values be sorted in an ascending order.
4217 Returns
4218 -------
4219 sorted_index : pandas.Index
4220 Sorted copy of the index.
4221 indexer : numpy.ndarray, optional
4222 The indices that the index itself was sorted by.
4224 See Also
4225 --------
4226 Series.sort_values : Sort values of a Series.
4227 DataFrame.sort_values : Sort values in a DataFrame.
4229 Examples
4230 --------
4231 >>> idx = pd.Index([10, 100, 1, 1000])
4232 >>> idx
4233 Int64Index([10, 100, 1, 1000], dtype='int64')
4235 Sort values in ascending order (default behavior).
4237 >>> idx.sort_values()
4238 Int64Index([1, 10, 100, 1000], dtype='int64')
4240 Sort values in descending order, and also get the indices `idx` was
4241 sorted by.
4243 >>> idx.sort_values(ascending=False, return_indexer=True)
4244 (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
4245 """
4246 _as = self.argsort()
4247 if not ascending:
4248 _as = _as[::-1]
4250 sorted_index = self.take(_as)
4252 if return_indexer:
4253 return sorted_index, _as
4254 else:
4255 return sorted_index
4257 def sort(self, *args, **kwargs):
4258 """
4259 Use sort_values instead.
4260 """
4261 raise TypeError("cannot sort an Index object in-place, use sort_values instead")
4263 def shift(self, periods=1, freq=None):
4264 """
4265 Shift index by desired number of time frequency increments.
4267 This method is for shifting the values of datetime-like indexes
4268 by a specified time increment a given number of times.
4270 Parameters
4271 ----------
4272 periods : int, default 1
4273 Number of periods (or increments) to shift by,
4274 can be positive or negative.
4275 freq : pandas.DateOffset, pandas.Timedelta or str, optional
4276 Frequency increment to shift by.
4277 If None, the index is shifted by its own `freq` attribute.
4278 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
4280 Returns
4281 -------
4282 pandas.Index
4283 Shifted index.
4285 See Also
4286 --------
4287 Series.shift : Shift values of Series.
4289 Notes
4290 -----
4291 This method is only implemented for datetime-like index classes,
4292 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
4294 Examples
4295 --------
4296 Put the first 5 month starts of 2011 into an index.
4298 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
4299 >>> month_starts
4300 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
4301 '2011-05-01'],
4302 dtype='datetime64[ns]', freq='MS')
4304 Shift the index by 10 days.
4306 >>> month_starts.shift(10, freq='D')
4307 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
4308 '2011-05-11'],
4309 dtype='datetime64[ns]', freq=None)
4311 The default value of `freq` is the `freq` attribute of the index,
4312 which is 'MS' (month start) in this example.
4314 >>> month_starts.shift(10)
4315 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
4316 '2012-03-01'],
4317 dtype='datetime64[ns]', freq='MS')
4318 """
4319 raise NotImplementedError(f"Not supported for type {type(self).__name__}")
4321 def argsort(self, *args, **kwargs):
4322 """
4323 Return the integer indices that would sort the index.
4325 Parameters
4326 ----------
4327 *args
4328 Passed to `numpy.ndarray.argsort`.
4329 **kwargs
4330 Passed to `numpy.ndarray.argsort`.
4332 Returns
4333 -------
4334 numpy.ndarray
4335 Integer indices that would sort the index if used as
4336 an indexer.
4338 See Also
4339 --------
4340 numpy.argsort : Similar method for NumPy arrays.
4341 Index.sort_values : Return sorted copy of Index.
4343 Examples
4344 --------
4345 >>> idx = pd.Index(['b', 'a', 'd', 'c'])
4346 >>> idx
4347 Index(['b', 'a', 'd', 'c'], dtype='object')
4349 >>> order = idx.argsort()
4350 >>> order
4351 array([1, 0, 3, 2])
4353 >>> idx[order]
4354 Index(['a', 'b', 'c', 'd'], dtype='object')
4355 """
4356 result = self.asi8
4357 if result is None:
4358 result = np.array(self)
4359 return result.argsort(*args, **kwargs)
4361 _index_shared_docs[
4362 "get_value"
4363 ] = """
4364 Fast lookup of value from 1-dimensional ndarray. Only use this if you
4365 know what you're doing.
4367 Returns
4368 -------
4369 scalar
4370 A value in the Series with the index of the key value in self.
4371 """
4373 @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs)
4374 def get_value(self, series, key):
4376 # if we have something that is Index-like, then
4377 # use this, e.g. DatetimeIndex
4378 # Things like `Series._get_value` (via .at) pass the EA directly here.
4379 s = extract_array(series, extract_numpy=True)
4380 if isinstance(s, ExtensionArray):
4381 if is_scalar(key):
4382 # GH 20882, 21257
4383 # First try to convert the key to a location
4384 # If that fails, raise a KeyError if an integer
4385 # index, otherwise, see if key is an integer, and
4386 # try that
4387 try:
4388 iloc = self.get_loc(key)
4389 return s[iloc]
4390 except KeyError:
4391 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
4392 raise
4393 elif is_integer(key):
4394 return s[key]
4395 else:
4396 # if key is not a scalar, directly raise an error (the code below
4397 # would convert to numpy arrays and raise later any way) - GH29926
4398 raise InvalidIndexError(key)
4400 s = com.values_from_object(series)
4401 k = com.values_from_object(key)
4403 k = self._convert_scalar_indexer(k, kind="getitem")
4404 try:
4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
4408 raise
4410 try:
4411 return libindex.get_value_at(s, key)
4412 except IndexError:
4413 raise
4414 except TypeError:
4415 # generator/iterator-like
4416 if is_iterator(key):
4417 raise InvalidIndexError(key)
4418 else:
4419 raise e1
4420 except Exception:
4421 raise e1
4422 except TypeError:
4423 # e.g. "[False] is an invalid key"
4424 if is_scalar(key):
4425 raise IndexError(key)
4426 raise InvalidIndexError(key)
4428 def set_value(self, arr, key, value):
4429 """
4430 Fast lookup of value from 1-dimensional ndarray.
4432 .. deprecated:: 1.0
4434 Notes
4435 -----
4436 Only use this if you know what you're doing.
4437 """
4438 warnings.warn(
4439 (
4440 "The 'set_value' method is deprecated, and "
4441 "will be removed in a future version."
4442 ),
4443 FutureWarning,
4444 stacklevel=2,
4445 )
4446 self._engine.set_value(
4447 com.values_from_object(arr), com.values_from_object(key), value
4448 )
4450 _index_shared_docs[
4451 "get_indexer_non_unique"
4452 ] = """
4453 Compute indexer and mask for new index given the current index. The
4454 indexer should be then used as an input to ndarray.take to align the
4455 current data to the new index.
4457 Parameters
4458 ----------
4459 target : %(target_klass)s
4461 Returns
4462 -------
4463 indexer : ndarray of int
4464 Integers from 0 to n - 1 indicating that the index at these
4465 positions matches the corresponding target values. Missing values
4466 in the target are marked by -1.
4467 missing : ndarray of int
4468 An indexer into the target of the values not found.
4469 These correspond to the -1 in the indexer array.
4470 """
4472 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
4473 def get_indexer_non_unique(self, target):
4474 target = ensure_index(target)
4475 pself, ptarget = self._maybe_promote(target)
4476 if pself is not self or ptarget is not target:
4477 return pself.get_indexer_non_unique(ptarget)
4479 if is_categorical(target):
4480 tgt_values = np.asarray(target)
4481 elif self.is_all_dates and target.is_all_dates: # GH 30399
4482 tgt_values = target.asi8
4483 else:
4484 tgt_values = target._ndarray_values
4486 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
4487 return ensure_platform_int(indexer), missing
4489 def get_indexer_for(self, target, **kwargs):
4490 """
4491 Guaranteed return of an indexer even when non-unique.
4493 This dispatches to get_indexer or get_indexer_non_unique
4494 as appropriate.
4496 Returns
4497 -------
4498 numpy.ndarray
4499 List of indices.
4500 """
4501 if self.is_unique:
4502 return self.get_indexer(target, **kwargs)
4503 indexer, _ = self.get_indexer_non_unique(target, **kwargs)
4504 return indexer
4506 def _maybe_promote(self, other):
4507 # A hack, but it works
4509 if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
4510 return type(other)(self), other
4511 elif self.inferred_type == "boolean":
4512 if not is_object_dtype(self.dtype):
4513 return self.astype("object"), other.astype("object")
4514 return self, other
4516 def groupby(self, values) -> Dict[Hashable, np.ndarray]:
4517 """
4518 Group the index labels by a given array of values.
4520 Parameters
4521 ----------
4522 values : array
4523 Values used to determine the groups.
4525 Returns
4526 -------
4527 dict
4528 {group name -> group labels}
4529 """
4531 # TODO: if we are a MultiIndex, we can do better
4532 # that converting to tuples
4533 if isinstance(values, ABCMultiIndex):
4534 values = values.values
4535 values = ensure_categorical(values)
4536 result = values._reverse_indexer()
4538 # map to the label
4539 result = {k: self.take(v) for k, v in result.items()}
4541 return result
4543 def map(self, mapper, na_action=None):
4544 """
4545 Map values using input correspondence (a dict, Series, or function).
4547 Parameters
4548 ----------
4549 mapper : function, dict, or Series
4550 Mapping correspondence.
4551 na_action : {None, 'ignore'}
4552 If 'ignore', propagate NA values, without passing them to the
4553 mapping correspondence.
4555 Returns
4556 -------
4557 applied : Union[Index, MultiIndex], inferred
4558 The output of the mapping function applied to the index.
4559 If the function returns a tuple with more than one element
4560 a MultiIndex will be returned.
4561 """
4563 from pandas.core.indexes.multi import MultiIndex
4565 new_values = super()._map_values(mapper, na_action=na_action)
4567 attributes = self._get_attributes_dict()
4569 # we can return a MultiIndex
4570 if new_values.size and isinstance(new_values[0], tuple):
4571 if isinstance(self, MultiIndex):
4572 names = self.names
4573 elif attributes.get("name"):
4574 names = [attributes.get("name")] * len(new_values[0])
4575 else:
4576 names = None
4577 return MultiIndex.from_tuples(new_values, names=names)
4579 attributes["copy"] = False
4580 if not new_values.size:
4581 # empty
4582 attributes["dtype"] = self.dtype
4584 return Index(new_values, **attributes)
4586 def isin(self, values, level=None):
4587 """
4588 Return a boolean array where the index values are in `values`.
4590 Compute boolean array of whether each index value is found in the
4591 passed set of values. The length of the returned boolean array matches
4592 the length of the index.
4594 Parameters
4595 ----------
4596 values : set or list-like
4597 Sought values.
4598 level : str or int, optional
4599 Name or position of the index level to use (if the index is a
4600 `MultiIndex`).
4602 Returns
4603 -------
4604 is_contained : ndarray
4605 NumPy array of boolean values.
4607 See Also
4608 --------
4609 Series.isin : Same for Series.
4610 DataFrame.isin : Same method for DataFrames.
4612 Notes
4613 -----
4614 In the case of `MultiIndex` you must either specify `values` as a
4615 list-like object containing tuples that are the same length as the
4616 number of levels, or specify `level`. Otherwise it will raise a
4617 ``ValueError``.
4619 If `level` is specified:
4621 - if it is the name of one *and only one* index level, use that level;
4622 - otherwise it should be a number indicating level position.
4624 Examples
4625 --------
4626 >>> idx = pd.Index([1,2,3])
4627 >>> idx
4628 Int64Index([1, 2, 3], dtype='int64')
4630 Check whether each index value in a list of values.
4631 >>> idx.isin([1, 4])
4632 array([ True, False, False])
4634 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
4635 ... ['red', 'blue', 'green']],
4636 ... names=('number', 'color'))
4637 >>> midx
4638 MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']],
4639 codes=[[0, 1, 2], [2, 0, 1]],
4640 names=['number', 'color'])
4642 Check whether the strings in the 'color' level of the MultiIndex
4643 are in a list of colors.
4645 >>> midx.isin(['red', 'orange', 'yellow'], level='color')
4646 array([ True, False, False])
4648 To check across the levels of a MultiIndex, pass a list of tuples:
4650 >>> midx.isin([(1, 'red'), (3, 'red')])
4651 array([ True, False, False])
4653 For a DatetimeIndex, string values in `values` are converted to
4654 Timestamps.
4656 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
4657 >>> dti = pd.to_datetime(dates)
4658 >>> dti
4659 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
4660 dtype='datetime64[ns]', freq=None)
4662 >>> dti.isin(['2000-03-11'])
4663 array([ True, False, False])
4664 """
4665 if level is not None:
4666 self._validate_index_level(level)
4667 return algos.isin(self, values)
4669 def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
4670 # this is for partial string indexing,
4671 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
4672 raise NotImplementedError
4674 def slice_indexer(self, start=None, end=None, step=None, kind=None):
4675 """
4676 For an ordered or unique index, compute the slice indexer for input
4677 labels and step.
4679 Parameters
4680 ----------
4681 start : label, default None
4682 If None, defaults to the beginning.
4683 end : label, default None
4684 If None, defaults to the end.
4685 step : int, default None
4686 kind : str, default None
4688 Returns
4689 -------
4690 indexer : slice
4692 Raises
4693 ------
4694 KeyError : If key does not exist, or key is not unique and index is
4695 not ordered.
4697 Notes
4698 -----
4699 This function assumes that the data is sorted, so use at your own peril
4701 Examples
4702 --------
4703 This is a method on all index types. For example you can do:
4705 >>> idx = pd.Index(list('abcd'))
4706 >>> idx.slice_indexer(start='b', end='c')
4707 slice(1, 3)
4709 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
4710 >>> idx.slice_indexer(start='b', end=('c', 'g'))
4711 slice(1, 3)
4712 """
4713 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
4715 # return a slice
4716 if not is_scalar(start_slice):
4717 raise AssertionError("Start slice bound is non-scalar")
4718 if not is_scalar(end_slice):
4719 raise AssertionError("End slice bound is non-scalar")
4721 return slice(start_slice, end_slice, step)
4723 def _maybe_cast_indexer(self, key):
4724 """
4725 If we have a float key and are not a floating index, then try to cast
4726 to an int if equivalent.
4727 """
4729 if is_float(key) and not self.is_floating():
4730 try:
4731 ckey = int(key)
4732 if ckey == key:
4733 key = ckey
4734 except (OverflowError, ValueError, TypeError):
4735 pass
4736 return key
4738 def _validate_indexer(self, form, key, kind):
4739 """
4740 If we are positional indexer, validate that we have appropriate
4741 typed bounds must be an integer.
4742 """
4743 assert kind in ["ix", "loc", "getitem", "iloc"]
4745 if key is None:
4746 pass
4747 elif is_integer(key):
4748 pass
4749 elif kind in ["iloc", "getitem"]:
4750 self._invalid_indexer(form, key)
4751 return key
4753 _index_shared_docs[
4754 "_maybe_cast_slice_bound"
4755 ] = """
4756 This function should be overloaded in subclasses that allow non-trivial
4757 casting on label-slice bounds, e.g. datetime-like indices allowing
4758 strings containing formatted datetimes.
4760 Parameters
4761 ----------
4762 label : object
4763 side : {'left', 'right'}
4764 kind : {'ix', 'loc', 'getitem'}
4766 Returns
4767 -------
4768 label : object
4770 Notes
4771 -----
4772 Value of `side` parameter should be validated in caller.
4773 """
4775 @Appender(_index_shared_docs["_maybe_cast_slice_bound"])
4776 def _maybe_cast_slice_bound(self, label, side, kind):
4777 assert kind in ["ix", "loc", "getitem", None]
4779 # We are a plain index here (sub-class override this method if they
4780 # wish to have special treatment for floats/ints, e.g. Float64Index and
4781 # datetimelike Indexes
4782 # reject them
4783 if is_float(label):
4784 if not (kind in ["ix"] and (self.holds_integer() or self.is_floating())):
4785 self._invalid_indexer("slice", label)
4787 # we are trying to find integer bounds on a non-integer based index
4788 # this is rejected (generally .loc gets you here)
4789 elif is_integer(label):
4790 self._invalid_indexer("slice", label)
4792 return label
4794 def _searchsorted_monotonic(self, label, side="left"):
4795 if self.is_monotonic_increasing:
4796 return self.searchsorted(label, side=side)
4797 elif self.is_monotonic_decreasing:
4798 # np.searchsorted expects ascending sort order, have to reverse
4799 # everything for it to work (element ordering, search side and
4800 # resulting value).
4801 pos = self[::-1].searchsorted(
4802 label, side="right" if side == "left" else "left"
4803 )
4804 return len(self) - pos
4806 raise ValueError("index must be monotonic increasing or decreasing")
4808 def get_slice_bound(self, label, side, kind):
4809 """
4810 Calculate slice bound that corresponds to given label.
4812 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
4813 of given label.
4815 Parameters
4816 ----------
4817 label : object
4818 side : {'left', 'right'}
4819 kind : {'ix', 'loc', 'getitem'}
4821 Returns
4822 -------
4823 int
4824 Index of label.
4825 """
4826 assert kind in ["ix", "loc", "getitem", None]
4828 if side not in ("left", "right"):
4829 raise ValueError(
4830 f"Invalid value for side kwarg, must be either"
4831 f" 'left' or 'right': {side}"
4832 )
4834 original_label = label
4836 # For datetime indices label may be a string that has to be converted
4837 # to datetime boundary according to its resolution.
4838 label = self._maybe_cast_slice_bound(label, side, kind)
4840 # we need to look up the label
4841 try:
4842 slc = self.get_loc(label)
4843 except KeyError as err:
4844 try:
4845 return self._searchsorted_monotonic(label, side)
4846 except ValueError:
4847 # raise the original KeyError
4848 raise err
4850 if isinstance(slc, np.ndarray):
4851 # get_loc may return a boolean array or an array of indices, which
4852 # is OK as long as they are representable by a slice.
4853 if is_bool_dtype(slc):
4854 slc = lib.maybe_booleans_to_slice(slc.view("u1"))
4855 else:
4856 slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self))
4857 if isinstance(slc, np.ndarray):
4858 raise KeyError(
4859 f"Cannot get {side} slice bound for non-unique "
4860 f"label: {repr(original_label)}"
4861 )
4863 if isinstance(slc, slice):
4864 if side == "left":
4865 return slc.start
4866 else:
4867 return slc.stop
4868 else:
4869 if side == "right":
4870 return slc + 1
4871 else:
4872 return slc
4874 def slice_locs(self, start=None, end=None, step=None, kind=None):
4875 """
4876 Compute slice locations for input labels.
4878 Parameters
4879 ----------
4880 start : label, default None
4881 If None, defaults to the beginning.
4882 end : label, default None
4883 If None, defaults to the end.
4884 step : int, defaults None
4885 If None, defaults to 1.
4886 kind : {'ix', 'loc', 'getitem'} or None
4888 Returns
4889 -------
4890 start, end : int
4892 See Also
4893 --------
4894 Index.get_loc : Get location for a single label.
4896 Notes
4897 -----
4898 This method only works if the index is monotonic or unique.
4900 Examples
4901 --------
4902 >>> idx = pd.Index(list('abcd'))
4903 >>> idx.slice_locs(start='b', end='c')
4904 (1, 3)
4905 """
4906 inc = step is None or step >= 0
4908 if not inc:
4909 # If it's a reverse slice, temporarily swap bounds.
4910 start, end = end, start
4912 # GH 16785: If start and end happen to be date strings with UTC offsets
4913 # attempt to parse and check that the offsets are the same
4914 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
4915 try:
4916 ts_start = Timestamp(start)
4917 ts_end = Timestamp(end)
4918 except (ValueError, TypeError):
4919 pass
4920 else:
4921 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
4922 raise ValueError("Both dates must have the same UTC offset")
4924 start_slice = None
4925 if start is not None:
4926 start_slice = self.get_slice_bound(start, "left", kind)
4927 if start_slice is None:
4928 start_slice = 0
4930 end_slice = None
4931 if end is not None:
4932 end_slice = self.get_slice_bound(end, "right", kind)
4933 if end_slice is None:
4934 end_slice = len(self)
4936 if not inc:
4937 # Bounds at this moment are swapped, swap them back and shift by 1.
4938 #
4939 # slice_locs('B', 'A', step=-1): s='B', e='A'
4940 #
4941 # s='A' e='B'
4942 # AFTER SWAP: | |
4943 # v ------------------> V
4944 # -----------------------------------
4945 # | | |A|A|A|A| | | | | |B|B| | | | |
4946 # -----------------------------------
4947 # ^ <------------------ ^
4948 # SHOULD BE: | |
4949 # end=s-1 start=e-1
4950 #
4951 end_slice, start_slice = start_slice - 1, end_slice - 1
4953 # i == -1 triggers ``len(self) + i`` selection that points to the
4954 # last element, not before-the-first one, subtracting len(self)
4955 # compensates that.
4956 if end_slice == -1:
4957 end_slice -= len(self)
4958 if start_slice == -1:
4959 start_slice -= len(self)
4961 return start_slice, end_slice
4963 def delete(self, loc):
4964 """
4965 Make new Index with passed location(-s) deleted.
4967 Returns
4968 -------
4969 new_index : Index
4970 """
4971 return self._shallow_copy(np.delete(self._data, loc))
4973 def insert(self, loc, item):
4974 """
4975 Make new Index inserting new item at location.
4977 Follows Python list.append semantics for negative values.
4979 Parameters
4980 ----------
4981 loc : int
4982 item : object
4984 Returns
4985 -------
4986 new_index : Index
4987 """
4988 _self = np.asarray(self)
4989 item = self._coerce_scalar_to_index(item)._ndarray_values
4990 idx = np.concatenate((_self[:loc], item, _self[loc:]))
4991 return self._shallow_copy_with_infer(idx)
4993 def drop(self, labels, errors="raise"):
4994 """
4995 Make new Index with passed list of labels deleted.
4997 Parameters
4998 ----------
4999 labels : array-like
5000 errors : {'ignore', 'raise'}, default 'raise'
5001 If 'ignore', suppress error and existing labels are dropped.
5003 Returns
5004 -------
5005 dropped : Index
5007 Raises
5008 ------
5009 KeyError
5010 If not all of the labels are found in the selected axis
5011 """
5012 arr_dtype = "object" if self.dtype == "object" else None
5013 labels = com.index_labels_to_array(labels, dtype=arr_dtype)
5014 indexer = self.get_indexer(labels)
5015 mask = indexer == -1
5016 if mask.any():
5017 if errors != "ignore":
5018 raise KeyError(f"{labels[mask]} not found in axis")
5019 indexer = indexer[~mask]
5020 return self.delete(indexer)
5022 # --------------------------------------------------------------------
5023 # Generated Arithmetic, Comparison, and Unary Methods
5025 @classmethod
5026 def _add_comparison_methods(cls):
5027 """
5028 Add in comparison methods.
5029 """
5030 cls.__eq__ = _make_comparison_op(operator.eq, cls)
5031 cls.__ne__ = _make_comparison_op(operator.ne, cls)
5032 cls.__lt__ = _make_comparison_op(operator.lt, cls)
5033 cls.__gt__ = _make_comparison_op(operator.gt, cls)
5034 cls.__le__ = _make_comparison_op(operator.le, cls)
5035 cls.__ge__ = _make_comparison_op(operator.ge, cls)
5037 @classmethod
5038 def _add_numeric_methods_add_sub_disabled(cls):
5039 """
5040 Add in the numeric add/sub methods to disable.
5041 """
5042 cls.__add__ = make_invalid_op("__add__")
5043 cls.__radd__ = make_invalid_op("__radd__")
5044 cls.__iadd__ = make_invalid_op("__iadd__")
5045 cls.__sub__ = make_invalid_op("__sub__")
5046 cls.__rsub__ = make_invalid_op("__rsub__")
5047 cls.__isub__ = make_invalid_op("__isub__")
5049 @classmethod
5050 def _add_numeric_methods_disabled(cls):
5051 """
5052 Add in numeric methods to disable other than add/sub.
5053 """
5054 cls.__pow__ = make_invalid_op("__pow__")
5055 cls.__rpow__ = make_invalid_op("__rpow__")
5056 cls.__mul__ = make_invalid_op("__mul__")
5057 cls.__rmul__ = make_invalid_op("__rmul__")
5058 cls.__floordiv__ = make_invalid_op("__floordiv__")
5059 cls.__rfloordiv__ = make_invalid_op("__rfloordiv__")
5060 cls.__truediv__ = make_invalid_op("__truediv__")
5061 cls.__rtruediv__ = make_invalid_op("__rtruediv__")
5062 cls.__mod__ = make_invalid_op("__mod__")
5063 cls.__divmod__ = make_invalid_op("__divmod__")
5064 cls.__neg__ = make_invalid_op("__neg__")
5065 cls.__pos__ = make_invalid_op("__pos__")
5066 cls.__abs__ = make_invalid_op("__abs__")
5067 cls.__inv__ = make_invalid_op("__inv__")
5069 @classmethod
5070 def _add_numeric_methods_binary(cls):
5071 """
5072 Add in numeric methods.
5073 """
5074 cls.__add__ = _make_arithmetic_op(operator.add, cls)
5075 cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
5076 cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
5077 cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls)
5078 cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls)
5079 cls.__pow__ = _make_arithmetic_op(operator.pow, cls)
5081 cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
5082 cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
5084 # TODO: rmod? rdivmod?
5085 cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
5086 cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
5087 cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
5088 cls.__divmod__ = _make_arithmetic_op(divmod, cls)
5089 cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
5090 cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
5092 @classmethod
5093 def _add_numeric_methods_unary(cls):
5094 """
5095 Add in numeric unary methods.
5096 """
5098 def _make_evaluate_unary(op, opstr):
5099 def _evaluate_numeric_unary(self):
5101 attrs = self._get_attributes_dict()
5102 return Index(op(self.values), **attrs)
5104 _evaluate_numeric_unary.__name__ = opstr
5105 return _evaluate_numeric_unary
5107 cls.__neg__ = _make_evaluate_unary(operator.neg, "__neg__")
5108 cls.__pos__ = _make_evaluate_unary(operator.pos, "__pos__")
5109 cls.__abs__ = _make_evaluate_unary(np.abs, "__abs__")
5110 cls.__inv__ = _make_evaluate_unary(lambda x: -x, "__inv__")
5112 @classmethod
5113 def _add_numeric_methods(cls):
5114 cls._add_numeric_methods_unary()
5115 cls._add_numeric_methods_binary()
5117 @classmethod
5118 def _add_logical_methods(cls):
5119 """
5120 Add in logical methods.
5121 """
5122 _doc = """
5123 %(desc)s
5125 Parameters
5126 ----------
5127 *args
5128 These parameters will be passed to numpy.%(outname)s.
5129 **kwargs
5130 These parameters will be passed to numpy.%(outname)s.
5132 Returns
5133 -------
5134 %(outname)s : bool or array_like (if axis is specified)
5135 A single element array_like may be converted to bool."""
5137 _index_shared_docs["index_all"] = dedent(
5138 """
5140 See Also
5141 --------
5142 Index.any : Return whether any element in an Index is True.
5143 Series.any : Return whether any element in a Series is True.
5144 Series.all : Return whether all elements in a Series are True.
5146 Notes
5147 -----
5148 Not a Number (NaN), positive infinity and negative infinity
5149 evaluate to True because these are not equal to zero.
5151 Examples
5152 --------
5153 **all**
5155 True, because nonzero integers are considered True.
5157 >>> pd.Index([1, 2, 3]).all()
5158 True
5160 False, because ``0`` is considered False.
5162 >>> pd.Index([0, 1, 2]).all()
5163 False
5165 **any**
5167 True, because ``1`` is considered True.
5169 >>> pd.Index([0, 0, 1]).any()
5170 True
5172 False, because ``0`` is considered False.
5174 >>> pd.Index([0, 0, 0]).any()
5175 False
5176 """
5177 )
5179 _index_shared_docs["index_any"] = dedent(
5180 """
5182 See Also
5183 --------
5184 Index.all : Return whether all elements are True.
5185 Series.all : Return whether all elements are True.
5187 Notes
5188 -----
5189 Not a Number (NaN), positive infinity and negative infinity
5190 evaluate to True because these are not equal to zero.
5192 Examples
5193 --------
5194 >>> index = pd.Index([0, 1, 2])
5195 >>> index.any()
5196 True
5198 >>> index = pd.Index([0, 0, 0])
5199 >>> index.any()
5200 False
5201 """
5202 )
5204 def _make_logical_function(name, desc, f):
5205 @Substitution(outname=name, desc=desc)
5206 @Appender(_index_shared_docs["index_" + name])
5207 @Appender(_doc)
5208 def logical_func(self, *args, **kwargs):
5209 result = f(self.values)
5210 if (
5211 isinstance(result, (np.ndarray, ABCSeries, Index))
5212 and result.ndim == 0
5213 ):
5214 # return NumPy type
5215 return result.dtype.type(result.item())
5216 else: # pragma: no cover
5217 return result
5219 logical_func.__name__ = name
5220 return logical_func
5222 cls.all = _make_logical_function(
5223 "all", "Return whether all elements are True.", np.all
5224 )
5225 cls.any = _make_logical_function(
5226 "any", "Return whether any element is True.", np.any
5227 )
5229 @classmethod
5230 def _add_logical_methods_disabled(cls):
5231 """
5232 Add in logical methods to disable.
5233 """
5234 cls.all = make_invalid_op("all")
5235 cls.any = make_invalid_op("any")
5237 @property
5238 def shape(self):
5239 """
5240 Return a tuple of the shape of the underlying data.
5241 """
5242 # not using "(len(self), )" to return "correct" shape if the values
5243 # consists of a >1 D array (see GH-27775)
5244 # overridden in MultiIndex.shape to avoid materializing the values
5245 return self._values.shape
5248Index._add_numeric_methods_disabled()
5249Index._add_logical_methods()
5250Index._add_comparison_methods()
5253def ensure_index_from_sequences(sequences, names=None):
5254 """
5255 Construct an index from sequences of data.
5257 A single sequence returns an Index. Many sequences returns a
5258 MultiIndex.
5260 Parameters
5261 ----------
5262 sequences : sequence of sequences
5263 names : sequence of str
5265 Returns
5266 -------
5267 index : Index or MultiIndex
5269 Examples
5270 --------
5271 >>> ensure_index_from_sequences([[1, 2, 3]], names=['name'])
5272 Int64Index([1, 2, 3], dtype='int64', name='name')
5274 >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
5275 names=['L1', 'L2'])
5276 MultiIndex([('a', 'a'),
5277 ('a', 'b')],
5278 names=['L1', 'L2'])
5280 See Also
5281 --------
5282 ensure_index
5283 """
5284 from pandas.core.indexes.multi import MultiIndex
5286 if len(sequences) == 1:
5287 if names is not None:
5288 names = names[0]
5289 return Index(sequences[0], name=names)
5290 else:
5291 return MultiIndex.from_arrays(sequences, names=names)
5294def ensure_index(index_like, copy=False):
5295 """
5296 Ensure that we have an index from some index-like object.
5298 Parameters
5299 ----------
5300 index : sequence
5301 An Index or other sequence
5302 copy : bool
5304 Returns
5305 -------
5306 index : Index or MultiIndex
5308 Examples
5309 --------
5310 >>> ensure_index(['a', 'b'])
5311 Index(['a', 'b'], dtype='object')
5313 >>> ensure_index([('a', 'a'), ('b', 'c')])
5314 Index([('a', 'a'), ('b', 'c')], dtype='object')
5316 >>> ensure_index([['a', 'a'], ['b', 'c']])
5317 MultiIndex([('a', 'b'),
5318 ('a', 'c')],
5319 dtype='object')
5320 )
5322 See Also
5323 --------
5324 ensure_index_from_sequences
5325 """
5326 if isinstance(index_like, Index):
5327 if copy:
5328 index_like = index_like.copy()
5329 return index_like
5330 if hasattr(index_like, "name"):
5331 return Index(index_like, name=index_like.name, copy=copy)
5333 if is_iterator(index_like):
5334 index_like = list(index_like)
5336 # must check for exactly list here because of strict type
5337 # check in clean_index_list
5338 if isinstance(index_like, list):
5339 if type(index_like) != list:
5340 index_like = list(index_like)
5342 converted, all_arrays = lib.clean_index_list(index_like)
5344 if len(converted) > 0 and all_arrays:
5345 from pandas.core.indexes.multi import MultiIndex
5347 return MultiIndex.from_arrays(converted)
5348 else:
5349 index_like = converted
5350 else:
5351 # clean_index_list does the equivalent of copying
5352 # so only need to do this if not list instance
5353 if copy:
5354 from copy import copy
5356 index_like = copy(index_like)
5358 return Index(index_like)
5361def _ensure_has_len(seq):
5362 """
5363 If seq is an iterator, put its values into a list.
5364 """
5365 try:
5366 len(seq)
5367 except TypeError:
5368 return list(seq)
5369 else:
5370 return seq
5373def _trim_front(strings):
5374 """
5375 Trims zeros and decimal points.
5376 """
5377 trimmed = strings
5378 while len(strings) > 0 and all(x[0] == " " for x in trimmed):
5379 trimmed = [x[1:] for x in trimmed]
5380 return trimmed
5383def _validate_join_method(method):
5384 if method not in ["left", "right", "inner", "outer"]:
5385 raise ValueError(f"do not recognize join method {method}")
5388def default_index(n):
5389 from pandas.core.indexes.range import RangeIndex
5391 return RangeIndex(0, n, name=None)
5394def maybe_extract_name(name, obj, cls) -> Optional[Hashable]:
5395 """
5396 If no name is passed, then extract it from data, validating hashability.
5397 """
5398 if name is None and isinstance(obj, (Index, ABCSeries)):
5399 # Note we don't just check for "name" attribute since that would
5400 # pick up e.g. dtype.name
5401 name = obj.name
5403 # GH#29069
5404 if not is_hashable(name):
5405 raise TypeError(f"{cls.__name__}.name must be a hashable type")
5407 return name
5410def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
5411 """
5412 If a dtype is passed, cast to the closest matching dtype that is supported
5413 by Index.
5415 Parameters
5416 ----------
5417 data : np.ndarray
5418 dtype : np.dtype
5419 copy : bool
5421 Returns
5422 -------
5423 np.ndarray
5424 """
5425 # we need to avoid having numpy coerce
5426 # things that look like ints/floats to ints unless
5427 # they are actually ints, e.g. '0' and 0.0
5428 # should not be coerced
5429 # GH 11836
5430 if is_integer_dtype(dtype):
5431 inferred = lib.infer_dtype(data, skipna=False)
5432 if inferred == "integer":
5433 data = maybe_cast_to_integer_array(data, dtype, copy=copy)
5434 elif inferred in ["floating", "mixed-integer-float"]:
5435 if isna(data).any():
5436 raise ValueError("cannot convert float NaN to integer")
5438 if inferred == "mixed-integer-float":
5439 data = maybe_cast_to_integer_array(data, dtype)
5441 # If we are actually all equal to integers,
5442 # then coerce to integer.
5443 try:
5444 data = _try_convert_to_int_array(data, copy, dtype)
5445 except ValueError:
5446 data = np.array(data, dtype=np.float64, copy=copy)
5448 elif inferred == "string":
5449 pass
5450 else:
5451 data = data.astype(dtype)
5452 elif is_float_dtype(dtype):
5453 inferred = lib.infer_dtype(data, skipna=False)
5454 if inferred == "string":
5455 pass
5456 else:
5457 data = data.astype(dtype)
5458 else:
5459 data = np.array(data, dtype=dtype, copy=copy)
5461 return data
5464def _maybe_cast_data_without_dtype(subarr):
5465 """
5466 If we have an arraylike input but no passed dtype, try to infer
5467 a supported dtype.
5469 Parameters
5470 ----------
5471 subarr : np.ndarray, Index, or Series
5473 Returns
5474 -------
5475 converted : np.ndarray or ExtensionArray
5476 dtype : np.dtype or ExtensionDtype
5477 """
5478 # Runtime import needed bc IntervalArray imports Index
5479 from pandas.core.arrays import (
5480 IntervalArray,
5481 PeriodArray,
5482 DatetimeArray,
5483 TimedeltaArray,
5484 )
5486 inferred = lib.infer_dtype(subarr, skipna=False)
5488 if inferred == "integer":
5489 try:
5490 data = _try_convert_to_int_array(subarr, False, None)
5491 return data, data.dtype
5492 except ValueError:
5493 pass
5495 return subarr, object
5497 elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
5498 # TODO: Returns IntegerArray for integer-na case in the future
5499 return subarr, np.float64
5501 elif inferred == "interval":
5502 try:
5503 data = IntervalArray._from_sequence(subarr, copy=False)
5504 return data, data.dtype
5505 except ValueError:
5506 # GH27172: mixed closed Intervals --> object dtype
5507 pass
5508 elif inferred == "boolean":
5509 # don't support boolean explicitly ATM
5510 pass
5511 elif inferred != "string":
5512 if inferred.startswith("datetime"):
5513 try:
5514 data = DatetimeArray._from_sequence(subarr, copy=False)
5515 return data, data.dtype
5516 except (ValueError, OutOfBoundsDatetime):
5517 # GH 27011
5518 # If we have mixed timezones, just send it
5519 # down the base constructor
5520 pass
5522 elif inferred.startswith("timedelta"):
5523 data = TimedeltaArray._from_sequence(subarr, copy=False)
5524 return data, data.dtype
5525 elif inferred == "period":
5526 try:
5527 data = PeriodArray._from_sequence(subarr)
5528 return data, data.dtype
5529 except IncompatibleFrequency:
5530 pass
5532 return subarr, subarr.dtype
5535def _try_convert_to_int_array(
5536 data: np.ndarray, copy: bool, dtype: np.dtype
5537) -> np.ndarray:
5538 """
5539 Attempt to convert an array of data into an integer array.
5541 Parameters
5542 ----------
5543 data : The data to convert.
5544 copy : bool
5545 Whether to copy the data or not.
5546 dtype : np.dtype
5548 Returns
5549 -------
5550 int_array : data converted to either an ndarray[int64] or ndarray[uint64]
5552 Raises
5553 ------
5554 ValueError if the conversion was not successful.
5555 """
5557 if not is_unsigned_integer_dtype(dtype):
5558 # skip int64 conversion attempt if uint-like dtype is passed, as
5559 # this could return Int64Index when UInt64Index is what's desired
5560 try:
5561 res = data.astype("i8", copy=False)
5562 if (res == data).all():
5563 return res # TODO: might still need to copy
5564 except (OverflowError, TypeError, ValueError):
5565 pass
5567 # Conversion to int64 failed (possibly due to overflow) or was skipped,
5568 # so let's try now with uint64.
5569 try:
5570 res = data.astype("u8", copy=False)
5571 if (res == data).all():
5572 return res # TODO: might still need to copy
5573 except (OverflowError, TypeError, ValueError):
5574 pass
5576 raise ValueError