Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/multi.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import datetime
2from sys import getsizeof
3from typing import Hashable, List, Optional, Sequence, Union
4import warnings
6import numpy as np
8from pandas._config import get_option
10from pandas._libs import Timestamp, algos as libalgos, index as libindex, lib, tslibs
11from pandas._libs.hashtable import duplicated_int64
12from pandas.compat.numpy import function as nv
13from pandas.errors import PerformanceWarning, UnsortedIndexError
14from pandas.util._decorators import Appender, cache_readonly
16from pandas.core.dtypes.cast import coerce_indexer_dtype
17from pandas.core.dtypes.common import (
18 ensure_int64,
19 ensure_platform_int,
20 is_categorical_dtype,
21 is_hashable,
22 is_integer,
23 is_iterator,
24 is_list_like,
25 is_object_dtype,
26 is_scalar,
27 pandas_dtype,
28)
29from pandas.core.dtypes.dtypes import ExtensionDtype
30from pandas.core.dtypes.generic import ABCDataFrame
31from pandas.core.dtypes.missing import array_equivalent, isna
33import pandas.core.algorithms as algos
34from pandas.core.arrays import Categorical
35from pandas.core.arrays.categorical import factorize_from_iterables
36import pandas.core.common as com
37import pandas.core.indexes.base as ibase
38from pandas.core.indexes.base import (
39 Index,
40 InvalidIndexError,
41 _index_shared_docs,
42 ensure_index,
43)
44from pandas.core.indexes.frozen import FrozenList
45import pandas.core.missing as missing
46from pandas.core.sorting import (
47 get_group_index,
48 indexer_from_factorized,
49 lexsort_indexer,
50)
51from pandas.core.util.hashing import hash_tuple, hash_tuples
53from pandas.io.formats.printing import (
54 format_object_attrs,
55 format_object_summary,
56 pprint_thing,
57)
59_index_doc_kwargs = dict(ibase._index_doc_kwargs)
60_index_doc_kwargs.update(
61 dict(klass="MultiIndex", target_klass="MultiIndex or list of tuples")
62)
65class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
66 """
67 This class manages a MultiIndex by mapping label combinations to positive
68 integers.
69 """
71 _base = libindex.UInt64Engine
73 def _codes_to_ints(self, codes):
74 """
75 Transform combination(s) of uint64 in one uint64 (each), in a strictly
76 monotonic way (i.e. respecting the lexicographic order of integer
77 combinations): see BaseMultiIndexCodesEngine documentation.
79 Parameters
80 ----------
81 codes : 1- or 2-dimensional array of dtype uint64
82 Combinations of integers (one per row)
84 Returns
85 -------
86 scalar or 1-dimensional array, of dtype uint64
87 Integer(s) representing one combination (each).
88 """
89 # Shift the representation of each level by the pre-calculated number
90 # of bits:
91 codes <<= self.offsets
93 # Now sum and OR are in fact interchangeable. This is a simple
94 # composition of the (disjunct) significant bits of each level (i.e.
95 # each column in "codes") in a single positive integer:
96 if codes.ndim == 1:
97 # Single key
98 return np.bitwise_or.reduce(codes)
100 # Multiple keys
101 return np.bitwise_or.reduce(codes, axis=1)
104class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
105 """
106 This class manages those (extreme) cases in which the number of possible
107 label combinations overflows the 64 bits integers, and uses an ObjectEngine
108 containing Python integers.
109 """
111 _base = libindex.ObjectEngine
113 def _codes_to_ints(self, codes):
114 """
115 Transform combination(s) of uint64 in one Python integer (each), in a
116 strictly monotonic way (i.e. respecting the lexicographic order of
117 integer combinations): see BaseMultiIndexCodesEngine documentation.
119 Parameters
120 ----------
121 codes : 1- or 2-dimensional array of dtype uint64
122 Combinations of integers (one per row)
124 Returns
125 -------
126 int, or 1-dimensional array of dtype object
127 Integer(s) representing one combination (each).
128 """
130 # Shift the representation of each level by the pre-calculated number
131 # of bits. Since this can overflow uint64, first make sure we are
132 # working with Python integers:
133 codes = codes.astype("object") << self.offsets
135 # Now sum and OR are in fact interchangeable. This is a simple
136 # composition of the (disjunct) significant bits of each level (i.e.
137 # each column in "codes") in a single positive integer (per row):
138 if codes.ndim == 1:
139 # Single key
140 return np.bitwise_or.reduce(codes)
142 # Multiple keys
143 return np.bitwise_or.reduce(codes, axis=1)
146class MultiIndex(Index):
147 """
148 A multi-level, or hierarchical, index object for pandas objects.
150 Parameters
151 ----------
152 levels : sequence of arrays
153 The unique labels for each level.
154 codes : sequence of arrays
155 Integers for each level designating which label at each location.
157 .. versionadded:: 0.24.0
158 sortorder : optional int
159 Level of sortedness (must be lexicographically sorted by that
160 level).
161 names : optional sequence of objects
162 Names for each of the index levels. (name is accepted for compat).
163 copy : bool, default False
164 Copy the meta-data.
165 verify_integrity : bool, default True
166 Check that the levels/codes are consistent and valid.
168 Attributes
169 ----------
170 names
171 levels
172 codes
173 nlevels
174 levshape
176 Methods
177 -------
178 from_arrays
179 from_tuples
180 from_product
181 from_frame
182 set_levels
183 set_codes
184 to_frame
185 to_flat_index
186 is_lexsorted
187 sortlevel
188 droplevel
189 swaplevel
190 reorder_levels
191 remove_unused_levels
192 get_locs
194 See Also
195 --------
196 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
197 MultiIndex.from_product : Create a MultiIndex from the cartesian product
198 of iterables.
199 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
200 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
201 Index : The base pandas Index type.
203 Notes
204 -----
205 See the `user guide
206 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`_
207 for more.
209 Examples
210 --------
211 A new ``MultiIndex`` is typically constructed using one of the helper
212 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
213 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
215 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
216 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
217 MultiIndex([(1, 'red'),
218 (1, 'blue'),
219 (2, 'red'),
220 (2, 'blue')],
221 names=['number', 'color'])
223 See further examples for how to construct a MultiIndex in the doc strings
224 of the mentioned helper methods.
225 """
227 _deprecations = Index._deprecations | frozenset()
229 # initialize to zero-length tuples to make everything work
230 _typ = "multiindex"
231 _names = FrozenList()
232 _levels = FrozenList()
233 _codes = FrozenList()
234 _comparables = ["names"]
235 rename = Index.set_names
237 # --------------------------------------------------------------------
238 # Constructors
240 def __new__(
241 cls,
242 levels=None,
243 codes=None,
244 sortorder=None,
245 names=None,
246 dtype=None,
247 copy=False,
248 name=None,
249 verify_integrity: bool = True,
250 _set_identity: bool = True,
251 ):
253 # compat with Index
254 if name is not None:
255 names = name
256 if levels is None or codes is None:
257 raise TypeError("Must pass both levels and codes")
258 if len(levels) != len(codes):
259 raise ValueError("Length of levels and codes must be the same.")
260 if len(levels) == 0:
261 raise ValueError("Must pass non-zero number of levels/codes")
263 result = object.__new__(MultiIndex)
265 # we've already validated levels and codes, so shortcut here
266 result._set_levels(levels, copy=copy, validate=False)
267 result._set_codes(codes, copy=copy, validate=False)
269 result._names = [None] * len(levels)
270 if names is not None:
271 # handles name validation
272 result._set_names(names)
274 if sortorder is not None:
275 result.sortorder = int(sortorder)
276 else:
277 result.sortorder = sortorder
279 if verify_integrity:
280 new_codes = result._verify_integrity()
281 result._codes = new_codes
283 if _set_identity:
284 result._reset_identity()
286 return result
288 def _validate_codes(self, level: List, code: List):
289 """
290 Reassign code values as -1 if their corresponding levels are NaN.
292 Parameters
293 ----------
294 code : list
295 Code to reassign.
296 level : list
297 Level to check for missing values (NaN, NaT, None).
299 Returns
300 -------
301 new code where code value = -1 if it corresponds
302 to a level with missing values (NaN, NaT, None).
303 """
304 null_mask = isna(level)
305 if np.any(null_mask):
306 code = np.where(null_mask[code], -1, code)
307 return code
309 def _verify_integrity(
310 self, codes: Optional[List] = None, levels: Optional[List] = None
311 ):
312 """
313 Parameters
314 ----------
315 codes : optional list
316 Codes to check for validity. Defaults to current codes.
317 levels : optional list
318 Levels to check for validity. Defaults to current levels.
320 Raises
321 ------
322 ValueError
323 If length of levels and codes don't match, if the codes for any
324 level would exceed level bounds, or there are any duplicate levels.
326 Returns
327 -------
328 new codes where code value = -1 if it corresponds to a
329 NaN level.
330 """
331 # NOTE: Currently does not check, among other things, that cached
332 # nlevels matches nor that sortorder matches actually sortorder.
333 codes = codes or self.codes
334 levels = levels or self.levels
336 if len(levels) != len(codes):
337 raise ValueError(
338 "Length of levels and codes must match. NOTE: "
339 "this index is in an inconsistent state."
340 )
341 codes_length = len(codes[0])
342 for i, (level, level_codes) in enumerate(zip(levels, codes)):
343 if len(level_codes) != codes_length:
344 raise ValueError(
345 f"Unequal code lengths: {[len(code_) for code_ in codes]}"
346 )
347 if len(level_codes) and level_codes.max() >= len(level):
348 raise ValueError(
349 f"On level {i}, code max ({level_codes.max()}) >= length of "
350 f"level ({len(level)}). NOTE: this index is in an "
351 "inconsistent state"
352 )
353 if len(level_codes) and level_codes.min() < -1:
354 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")
355 if not level.is_unique:
356 raise ValueError(
357 f"Level values must be unique: {list(level)} on level {i}"
358 )
359 if self.sortorder is not None:
360 if self.sortorder > self._lexsort_depth():
361 raise ValueError(
362 "Value for sortorder must be inferior or equal to actual "
363 f"lexsort_depth: sortorder {self.sortorder} "
364 f"with lexsort_depth {self._lexsort_depth()}"
365 )
367 codes = [
368 self._validate_codes(level, code) for level, code in zip(levels, codes)
369 ]
370 new_codes = FrozenList(codes)
371 return new_codes
373 @classmethod
374 def from_arrays(cls, arrays, sortorder=None, names=lib.no_default):
375 """
376 Convert arrays to MultiIndex.
378 Parameters
379 ----------
380 arrays : list / sequence of array-likes
381 Each array-like gives one level's value for each data point.
382 len(arrays) is the number of levels.
383 sortorder : int or None
384 Level of sortedness (must be lexicographically sorted by that
385 level).
386 names : list / sequence of str, optional
387 Names for the levels in the index.
389 Returns
390 -------
391 MultiIndex
393 See Also
394 --------
395 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
396 MultiIndex.from_product : Make a MultiIndex from cartesian product
397 of iterables.
398 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
400 Examples
401 --------
402 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
403 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
404 MultiIndex([(1, 'red'),
405 (1, 'blue'),
406 (2, 'red'),
407 (2, 'blue')],
408 names=['number', 'color'])
409 """
410 error_msg = "Input must be a list / sequence of array-likes."
411 if not is_list_like(arrays):
412 raise TypeError(error_msg)
413 elif is_iterator(arrays):
414 arrays = list(arrays)
416 # Check if elements of array are list-like
417 for array in arrays:
418 if not is_list_like(array):
419 raise TypeError(error_msg)
421 # Check if lengths of all arrays are equal or not,
422 # raise ValueError, if not
423 for i in range(1, len(arrays)):
424 if len(arrays[i]) != len(arrays[i - 1]):
425 raise ValueError("all arrays must be same length")
427 codes, levels = factorize_from_iterables(arrays)
428 if names is lib.no_default:
429 names = [getattr(arr, "name", None) for arr in arrays]
431 return MultiIndex(
432 levels=levels,
433 codes=codes,
434 sortorder=sortorder,
435 names=names,
436 verify_integrity=False,
437 )
439 @classmethod
440 def from_tuples(cls, tuples, sortorder=None, names=None):
441 """
442 Convert list of tuples to MultiIndex.
444 Parameters
445 ----------
446 tuples : list / sequence of tuple-likes
447 Each tuple is the index of one row/column.
448 sortorder : int or None
449 Level of sortedness (must be lexicographically sorted by that
450 level).
451 names : list / sequence of str, optional
452 Names for the levels in the index.
454 Returns
455 -------
456 MultiIndex
458 See Also
459 --------
460 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
461 MultiIndex.from_product : Make a MultiIndex from cartesian product
462 of iterables.
463 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
465 Examples
466 --------
467 >>> tuples = [(1, 'red'), (1, 'blue'),
468 ... (2, 'red'), (2, 'blue')]
469 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
470 MultiIndex([(1, 'red'),
471 (1, 'blue'),
472 (2, 'red'),
473 (2, 'blue')],
474 names=['number', 'color'])
475 """
476 if not is_list_like(tuples):
477 raise TypeError("Input must be a list / sequence of tuple-likes.")
478 elif is_iterator(tuples):
479 tuples = list(tuples)
481 if len(tuples) == 0:
482 if names is None:
483 raise TypeError("Cannot infer number of levels from empty list")
484 arrays = [[]] * len(names)
485 elif isinstance(tuples, (np.ndarray, Index)):
486 if isinstance(tuples, Index):
487 tuples = tuples._values
489 arrays = list(lib.tuples_to_object_array(tuples).T)
490 elif isinstance(tuples, list):
491 arrays = list(lib.to_object_array_tuples(tuples).T)
492 else:
493 arrays = zip(*tuples)
495 return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
497 @classmethod
498 def from_product(cls, iterables, sortorder=None, names=lib.no_default):
499 """
500 Make a MultiIndex from the cartesian product of multiple iterables.
502 Parameters
503 ----------
504 iterables : list / sequence of iterables
505 Each iterable has unique labels for each level of the index.
506 sortorder : int or None
507 Level of sortedness (must be lexicographically sorted by that
508 level).
509 names : list / sequence of str, optional
510 Names for the levels in the index.
512 .. versionchanged:: 1.0.0
514 If not explicitly provided, names will be inferred from the
515 elements of iterables if an element has a name attribute
517 Returns
518 -------
519 MultiIndex
521 See Also
522 --------
523 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
524 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
525 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
527 Examples
528 --------
529 >>> numbers = [0, 1, 2]
530 >>> colors = ['green', 'purple']
531 >>> pd.MultiIndex.from_product([numbers, colors],
532 ... names=['number', 'color'])
533 MultiIndex([(0, 'green'),
534 (0, 'purple'),
535 (1, 'green'),
536 (1, 'purple'),
537 (2, 'green'),
538 (2, 'purple')],
539 names=['number', 'color'])
540 """
541 from pandas.core.reshape.util import cartesian_product
543 if not is_list_like(iterables):
544 raise TypeError("Input must be a list / sequence of iterables.")
545 elif is_iterator(iterables):
546 iterables = list(iterables)
548 codes, levels = factorize_from_iterables(iterables)
549 if names is lib.no_default:
550 names = [getattr(it, "name", None) for it in iterables]
552 codes = cartesian_product(codes)
553 return MultiIndex(levels, codes, sortorder=sortorder, names=names)
555 @classmethod
556 def from_frame(cls, df, sortorder=None, names=None):
557 """
558 Make a MultiIndex from a DataFrame.
560 .. versionadded:: 0.24.0
562 Parameters
563 ----------
564 df : DataFrame
565 DataFrame to be converted to MultiIndex.
566 sortorder : int, optional
567 Level of sortedness (must be lexicographically sorted by that
568 level).
569 names : list-like, optional
570 If no names are provided, use the column names, or tuple of column
571 names if the columns is a MultiIndex. If a sequence, overwrite
572 names with the given sequence.
574 Returns
575 -------
576 MultiIndex
577 The MultiIndex representation of the given DataFrame.
579 See Also
580 --------
581 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
582 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
583 MultiIndex.from_product : Make a MultiIndex from cartesian product
584 of iterables.
586 Examples
587 --------
588 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
589 ... ['NJ', 'Temp'], ['NJ', 'Precip']],
590 ... columns=['a', 'b'])
591 >>> df
592 a b
593 0 HI Temp
594 1 HI Precip
595 2 NJ Temp
596 3 NJ Precip
598 >>> pd.MultiIndex.from_frame(df)
599 MultiIndex([('HI', 'Temp'),
600 ('HI', 'Precip'),
601 ('NJ', 'Temp'),
602 ('NJ', 'Precip')],
603 names=['a', 'b'])
605 Using explicit names, instead of the column names
607 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
608 MultiIndex([('HI', 'Temp'),
609 ('HI', 'Precip'),
610 ('NJ', 'Temp'),
611 ('NJ', 'Precip')],
612 names=['state', 'observation'])
613 """
614 if not isinstance(df, ABCDataFrame):
615 raise TypeError("Input must be a DataFrame")
617 column_names, columns = zip(*df.items())
618 names = column_names if names is None else names
619 return cls.from_arrays(columns, sortorder=sortorder, names=names)
621 # --------------------------------------------------------------------
623 @property
624 def _values(self):
625 # We override here, since our parent uses _data, which we don't use.
626 return self.values
628 @property
629 def shape(self):
630 """
631 Return a tuple of the shape of the underlying data.
632 """
633 # overriding the base Index.shape definition to avoid materializing
634 # the values (GH-27384, GH-27775)
635 return (len(self),)
637 @property
638 def array(self):
639 """
640 Raises a ValueError for `MultiIndex` because there's no single
641 array backing a MultiIndex.
643 Raises
644 ------
645 ValueError
646 """
647 raise ValueError(
648 "MultiIndex has no single backing array. Use "
649 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."
650 )
652 # --------------------------------------------------------------------
653 # Levels Methods
655 @cache_readonly
656 def levels(self):
657 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
658 # create new IndexEngine
659 # https://github.com/pandas-dev/pandas/issues/31648
660 result = [
661 x._shallow_copy(name=name) for x, name in zip(self._levels, self._names)
662 ]
663 for level in result:
664 # disallow midx.levels[0].name = "foo"
665 level._no_setting_name = True
666 return FrozenList(result)
668 def _set_levels(
669 self, levels, level=None, copy=False, validate=True, verify_integrity=False
670 ):
671 # This is NOT part of the levels property because it should be
672 # externally not allowed to set levels. User beware if you change
673 # _levels directly
674 if validate:
675 if len(levels) == 0:
676 raise ValueError("Must set non-zero number of levels.")
677 if level is None and len(levels) != self.nlevels:
678 raise ValueError("Length of levels must match number of levels.")
679 if level is not None and len(levels) != len(level):
680 raise ValueError("Length of levels must match length of level.")
682 if level is None:
683 new_levels = FrozenList(
684 ensure_index(lev, copy=copy)._shallow_copy() for lev in levels
685 )
686 else:
687 level_numbers = [self._get_level_number(lev) for lev in level]
688 new_levels = list(self._levels)
689 for lev_num, lev in zip(level_numbers, levels):
690 new_levels[lev_num] = ensure_index(lev, copy=copy)._shallow_copy()
691 new_levels = FrozenList(new_levels)
693 if verify_integrity:
694 new_codes = self._verify_integrity(levels=new_levels)
695 self._codes = new_codes
697 names = self.names
698 self._levels = new_levels
699 if any(names):
700 self._set_names(names)
702 self._tuples = None
703 self._reset_cache()
705 def set_levels(self, levels, level=None, inplace=False, verify_integrity=True):
706 """
707 Set new levels on MultiIndex. Defaults to returning new index.
709 Parameters
710 ----------
711 levels : sequence or list of sequence
712 New level(s) to apply.
713 level : int, level name, or sequence of int/level names (default None)
714 Level(s) to set (None for all levels).
715 inplace : bool
716 If True, mutates in place.
717 verify_integrity : bool, default True
718 If True, checks that levels and codes are compatible.
720 Returns
721 -------
722 new index (of same type and class...etc)
724 Examples
725 --------
726 >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'),
727 (2, 'one'), (2, 'two'),
728 (3, 'one'), (3, 'two')],
729 names=['foo', 'bar'])
730 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])
731 MultiIndex([('a', 1),
732 ('a', 2),
733 ('b', 1),
734 ('b', 2),
735 ('c', 1),
736 ('c', 2)],
737 names=['foo', 'bar'])
738 >>> idx.set_levels(['a', 'b', 'c'], level=0)
739 MultiIndex([('a', 'one'),
740 ('a', 'two'),
741 ('b', 'one'),
742 ('b', 'two'),
743 ('c', 'one'),
744 ('c', 'two')],
745 names=['foo', 'bar'])
746 >>> idx.set_levels(['a', 'b'], level='bar')
747 MultiIndex([(1, 'a'),
748 (1, 'b'),
749 (2, 'a'),
750 (2, 'b'),
751 (3, 'a'),
752 (3, 'b')],
753 names=['foo', 'bar'])
755 If any of the levels passed to ``set_levels()`` exceeds the
756 existing length, all of the values from that argument will
757 be stored in the MultiIndex levels, though the values will
758 be truncated in the MultiIndex output.
760 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])
761 MultiIndex([('a', 1),
762 ('a', 2),
763 ('b', 1),
764 ('b', 2)],
765 names=['foo', 'bar'])
766 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels
767 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])
768 """
769 if is_list_like(levels) and not isinstance(levels, Index):
770 levels = list(levels)
772 if level is not None and not is_list_like(level):
773 if not is_list_like(levels):
774 raise TypeError("Levels must be list-like")
775 if is_list_like(levels[0]):
776 raise TypeError("Levels must be list-like")
777 level = [level]
778 levels = [levels]
779 elif level is None or is_list_like(level):
780 if not is_list_like(levels) or not is_list_like(levels[0]):
781 raise TypeError("Levels must be list of lists-like")
783 if inplace:
784 idx = self
785 else:
786 idx = self._shallow_copy()
787 idx._reset_identity()
788 idx._set_levels(
789 levels, level=level, validate=True, verify_integrity=verify_integrity
790 )
791 if not inplace:
792 return idx
794 @property
795 def codes(self):
796 return self._codes
798 def _set_codes(
799 self, codes, level=None, copy=False, validate=True, verify_integrity=False
800 ):
801 if validate:
802 if level is None and len(codes) != self.nlevels:
803 raise ValueError("Length of codes must match number of levels")
804 if level is not None and len(codes) != len(level):
805 raise ValueError("Length of codes must match length of levels.")
807 if level is None:
808 new_codes = FrozenList(
809 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()
810 for lev, level_codes in zip(self._levels, codes)
811 )
812 else:
813 level_numbers = [self._get_level_number(lev) for lev in level]
814 new_codes = list(self._codes)
815 for lev_num, level_codes in zip(level_numbers, codes):
816 lev = self.levels[lev_num]
817 new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy)
818 new_codes = FrozenList(new_codes)
820 if verify_integrity:
821 new_codes = self._verify_integrity(codes=new_codes)
823 self._codes = new_codes
825 self._tuples = None
826 self._reset_cache()
828 def set_codes(self, codes, level=None, inplace=False, verify_integrity=True):
829 """
830 Set new codes on MultiIndex. Defaults to returning
831 new index.
833 .. versionadded:: 0.24.0
835 New name for deprecated method `set_labels`.
837 Parameters
838 ----------
839 codes : sequence or list of sequence
840 New codes to apply.
841 level : int, level name, or sequence of int/level names (default None)
842 Level(s) to set (None for all levels).
843 inplace : bool
844 If True, mutates in place.
845 verify_integrity : bool (default True)
846 If True, checks that levels and codes are compatible.
848 Returns
849 -------
850 new index (of same type and class...etc)
852 Examples
853 --------
854 >>> idx = pd.MultiIndex.from_tuples([(1, 'one'),
855 (1, 'two'),
856 (2, 'one'),
857 (2, 'two')],
858 names=['foo', 'bar'])
859 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])
860 MultiIndex([(2, 'one'),
861 (1, 'one'),
862 (2, 'two'),
863 (1, 'two')],
864 names=['foo', 'bar'])
865 >>> idx.set_codes([1, 0, 1, 0], level=0)
866 MultiIndex([(2, 'one'),
867 (1, 'two'),
868 (2, 'one'),
869 (1, 'two')],
870 names=['foo', 'bar'])
871 >>> idx.set_codes([0, 0, 1, 1], level='bar')
872 MultiIndex([(1, 'one'),
873 (1, 'one'),
874 (2, 'two'),
875 (2, 'two')],
876 names=['foo', 'bar'])
877 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])
878 MultiIndex([(2, 'one'),
879 (1, 'one'),
880 (2, 'two'),
881 (1, 'two')],
882 names=['foo', 'bar'])
883 """
884 if level is not None and not is_list_like(level):
885 if not is_list_like(codes):
886 raise TypeError("Codes must be list-like")
887 if is_list_like(codes[0]):
888 raise TypeError("Codes must be list-like")
889 level = [level]
890 codes = [codes]
891 elif level is None or is_list_like(level):
892 if not is_list_like(codes) or not is_list_like(codes[0]):
893 raise TypeError("Codes must be list of lists-like")
895 if inplace:
896 idx = self
897 else:
898 idx = self._shallow_copy()
899 idx._reset_identity()
900 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
901 if not inplace:
902 return idx
904 def copy(
905 self,
906 names=None,
907 dtype=None,
908 levels=None,
909 codes=None,
910 deep=False,
911 _set_identity=False,
912 **kwargs,
913 ):
914 """
915 Make a copy of this object. Names, dtype, levels and codes can be
916 passed and will be set on new copy.
918 Parameters
919 ----------
920 names : sequence, optional
921 dtype : numpy dtype or pandas type, optional
922 levels : sequence, optional
923 codes : sequence, optional
925 Returns
926 -------
927 copy : MultiIndex
929 Notes
930 -----
931 In most cases, there should be no functional difference from using
932 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
933 This could be potentially expensive on large MultiIndex objects.
934 """
935 name = kwargs.get("name")
936 names = self._validate_names(name=name, names=names, deep=deep)
937 if "labels" in kwargs:
938 raise TypeError("'labels' argument has been removed; use 'codes' instead")
939 if deep:
940 from copy import deepcopy
942 if levels is None:
943 levels = deepcopy(self.levels)
944 if codes is None:
945 codes = deepcopy(self.codes)
946 else:
947 if levels is None:
948 levels = self.levels
949 if codes is None:
950 codes = self.codes
951 return MultiIndex(
952 levels=levels,
953 codes=codes,
954 names=names,
955 sortorder=self.sortorder,
956 verify_integrity=False,
957 _set_identity=_set_identity,
958 )
960 def __array__(self, dtype=None) -> np.ndarray:
961 """ the array interface, return my values """
962 return self.values
964 def view(self, cls=None):
965 """ this is defined as a copy with the same identity """
966 result = self.copy()
967 result._id = self._id
968 return result
970 def _shallow_copy_with_infer(self, values, **kwargs):
971 # On equal MultiIndexes the difference is empty.
972 # Therefore, an empty MultiIndex is returned GH13490
973 if len(values) == 0:
974 return MultiIndex(
975 levels=[[] for _ in range(self.nlevels)],
976 codes=[[] for _ in range(self.nlevels)],
977 **kwargs,
978 )
979 return self._shallow_copy(values, **kwargs)
981 @Appender(_index_shared_docs["contains"] % _index_doc_kwargs)
982 def __contains__(self, key) -> bool:
983 hash(key)
984 try:
985 self.get_loc(key)
986 return True
987 except (LookupError, TypeError, ValueError):
988 return False
990 @Appender(_index_shared_docs["_shallow_copy"])
991 def _shallow_copy(self, values=None, **kwargs):
992 if values is not None:
993 names = kwargs.pop("names", kwargs.pop("name", self.names))
994 # discards freq
995 kwargs.pop("freq", None)
996 return MultiIndex.from_tuples(values, names=names, **kwargs)
997 return self.copy(**kwargs)
999 @cache_readonly
1000 def dtype(self) -> np.dtype:
1001 return np.dtype("O")
1003 def _is_memory_usage_qualified(self) -> bool:
1004 """ return a boolean if we need a qualified .info display """
1006 def f(l):
1007 return "mixed" in l or "string" in l or "unicode" in l
1009 return any(f(l) for l in self._inferred_type_levels)
1011 @Appender(Index.memory_usage.__doc__)
1012 def memory_usage(self, deep: bool = False) -> int:
1013 # we are overwriting our base class to avoid
1014 # computing .values here which could materialize
1015 # a tuple representation unnecessarily
1016 return self._nbytes(deep)
1018 @cache_readonly
1019 def nbytes(self) -> int:
1020 """ return the number of bytes in the underlying data """
1021 return self._nbytes(False)
1023 def _nbytes(self, deep: bool = False) -> int:
1024 """
1025 return the number of bytes in the underlying data
1026 deeply introspect the level data if deep=True
1028 include the engine hashtable
1030 *this is in internal routine*
1032 """
1034 # for implementations with no useful getsizeof (PyPy)
1035 objsize = 24
1037 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
1038 label_nbytes = sum(i.nbytes for i in self.codes)
1039 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
1040 result = level_nbytes + label_nbytes + names_nbytes
1042 # include our engine hashtable
1043 result += self._engine.sizeof(deep=deep)
1044 return result
1046 # --------------------------------------------------------------------
1047 # Rendering Methods
1048 def _formatter_func(self, tup):
1049 """
1050 Formats each item in tup according to its level's formatter function.
1051 """
1052 formatter_funcs = [level._formatter_func for level in self.levels]
1053 return tuple(func(val) for func, val in zip(formatter_funcs, tup))
1055 def _format_data(self, name=None):
1056 """
1057 Return the formatted data as a unicode string
1058 """
1059 return format_object_summary(
1060 self, self._formatter_func, name=name, line_break_each_value=True
1061 )
1063 def _format_attrs(self):
1064 """
1065 Return a list of tuples of the (attr,formatted_value).
1066 """
1067 return format_object_attrs(self, include_dtype=False)
1069 def _format_native_types(self, na_rep="nan", **kwargs):
1070 new_levels = []
1071 new_codes = []
1073 # go through the levels and format them
1074 for level, level_codes in zip(self.levels, self.codes):
1075 level = level._format_native_types(na_rep=na_rep, **kwargs)
1076 # add nan values, if there are any
1077 mask = level_codes == -1
1078 if mask.any():
1079 nan_index = len(level)
1080 level = np.append(level, na_rep)
1081 assert not level_codes.flags.writeable # i.e. copy is needed
1082 level_codes = level_codes.copy() # make writeable
1083 level_codes[mask] = nan_index
1084 new_levels.append(level)
1085 new_codes.append(level_codes)
1087 if len(new_levels) == 1:
1088 # a single-level multi-index
1089 return Index(new_levels[0].take(new_codes[0]))._format_native_types()
1090 else:
1091 # reconstruct the multi-index
1092 mi = MultiIndex(
1093 levels=new_levels,
1094 codes=new_codes,
1095 names=self.names,
1096 sortorder=self.sortorder,
1097 verify_integrity=False,
1098 )
1099 return mi.values
1101 def format(
1102 self,
1103 space=2,
1104 sparsify=None,
1105 adjoin=True,
1106 names=False,
1107 na_rep=None,
1108 formatter=None,
1109 ):
1110 if len(self) == 0:
1111 return []
1113 stringified_levels = []
1114 for lev, level_codes in zip(self.levels, self.codes):
1115 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
1117 if len(lev) > 0:
1119 formatted = lev.take(level_codes).format(formatter=formatter)
1121 # we have some NA
1122 mask = level_codes == -1
1123 if mask.any():
1124 formatted = np.array(formatted, dtype=object)
1125 formatted[mask] = na
1126 formatted = formatted.tolist()
1128 else:
1129 # weird all NA case
1130 formatted = [
1131 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
1132 for x in algos.take_1d(lev._values, level_codes)
1133 ]
1134 stringified_levels.append(formatted)
1136 result_levels = []
1137 for lev, name in zip(stringified_levels, self.names):
1138 level = []
1140 if names:
1141 level.append(
1142 pprint_thing(name, escape_chars=("\t", "\r", "\n"))
1143 if name is not None
1144 else ""
1145 )
1147 level.extend(np.array(lev, dtype=object))
1148 result_levels.append(level)
1150 if sparsify is None:
1151 sparsify = get_option("display.multi_sparse")
1153 if sparsify:
1154 sentinel = ""
1155 # GH3547
1156 # use value of sparsify as sentinel, unless it's an obvious
1157 # "Truthy" value
1158 if sparsify not in [True, 1]:
1159 sentinel = sparsify
1160 # little bit of a kludge job for #1217
1161 result_levels = _sparsify(
1162 result_levels, start=int(names), sentinel=sentinel
1163 )
1165 if adjoin:
1166 from pandas.io.formats.format import _get_adjustment
1168 adj = _get_adjustment()
1169 return adj.adjoin(space, *result_levels).split("\n")
1170 else:
1171 return result_levels
1173 # --------------------------------------------------------------------
1175 def __len__(self) -> int:
1176 return len(self.codes[0])
1178 def _get_names(self):
1179 return FrozenList(self._names)
1181 def _set_names(self, names, level=None, validate=True):
1182 """
1183 Set new names on index. Each name has to be a hashable type.
1185 Parameters
1186 ----------
1187 values : str or sequence
1188 name(s) to set
1189 level : int, level name, or sequence of int/level names (default None)
1190 If the index is a MultiIndex (hierarchical), level(s) to set (None
1191 for all levels). Otherwise level must be None
1192 validate : boolean, default True
1193 validate that the names match level lengths
1195 Raises
1196 ------
1197 TypeError if each name is not hashable.
1199 Notes
1200 -----
1201 sets names on levels. WARNING: mutates!
1203 Note that you generally want to set this *after* changing levels, so
1204 that it only acts on copies
1205 """
1206 # GH 15110
1207 # Don't allow a single string for names in a MultiIndex
1208 if names is not None and not is_list_like(names):
1209 raise ValueError("Names should be list-like for a MultiIndex")
1210 names = list(names)
1212 if validate:
1213 if level is not None and len(names) != len(level):
1214 raise ValueError("Length of names must match length of level.")
1215 if level is None and len(names) != self.nlevels:
1216 raise ValueError(
1217 "Length of names must match number of levels in MultiIndex."
1218 )
1220 if level is None:
1221 level = range(self.nlevels)
1222 else:
1223 level = [self._get_level_number(lev) for lev in level]
1225 # set the name
1226 for lev, name in zip(level, names):
1227 if name is not None:
1228 # GH 20527
1229 # All items in 'names' need to be hashable:
1230 if not is_hashable(name):
1231 raise TypeError(
1232 f"{type(self).__name__}.name must be a hashable type"
1233 )
1234 self._names[lev] = name
1236 # If .levels has been accessed, the names in our cache will be stale.
1237 self._reset_cache()
1239 names = property(
1240 fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
1241 )
1243 @Appender(_index_shared_docs["_get_grouper_for_level"])
1244 def _get_grouper_for_level(self, mapper, level):
1245 indexer = self.codes[level]
1246 level_index = self.levels[level]
1248 if mapper is not None:
1249 # Handle group mapping function and return
1250 level_values = self.levels[level].take(indexer)
1251 grouper = level_values.map(mapper)
1252 return grouper, None, None
1254 codes, uniques = algos.factorize(indexer, sort=True)
1256 if len(uniques) > 0 and uniques[0] == -1:
1257 # Handle NAs
1258 mask = indexer != -1
1259 ok_codes, uniques = algos.factorize(indexer[mask], sort=True)
1261 codes = np.empty(len(indexer), dtype=indexer.dtype)
1262 codes[mask] = ok_codes
1263 codes[~mask] = -1
1265 if len(uniques) < len(level_index):
1266 # Remove unobserved levels from level_index
1267 level_index = level_index.take(uniques)
1268 else:
1269 # break references back to us so that setting the name
1270 # on the output of a groupby doesn't reflect back here.
1271 level_index = level_index.copy()
1273 if level_index._can_hold_na:
1274 grouper = level_index.take(codes, fill_value=True)
1275 else:
1276 grouper = level_index.take(codes)
1278 return grouper, codes, level_index
1280 @property
1281 def _constructor(self):
1282 return MultiIndex.from_tuples
1284 @cache_readonly
1285 def inferred_type(self) -> str:
1286 return "mixed"
1288 def _get_level_number(self, level) -> int:
1289 count = self.names.count(level)
1290 if (count > 1) and not is_integer(level):
1291 raise ValueError(
1292 f"The name {level} occurs multiple times, use a level number"
1293 )
1294 try:
1295 level = self.names.index(level)
1296 except ValueError:
1297 if not is_integer(level):
1298 raise KeyError(f"Level {level} not found")
1299 elif level < 0:
1300 level += self.nlevels
1301 if level < 0:
1302 orig_level = level - self.nlevels
1303 raise IndexError(
1304 f"Too many levels: Index has only {self.nlevels} levels,"
1305 f" {orig_level} is not a valid level number"
1306 )
1307 # Note: levels are zero-based
1308 elif level >= self.nlevels:
1309 raise IndexError(
1310 f"Too many levels: Index has only {self.nlevels} levels, "
1311 f"not {level + 1}"
1312 )
1313 return level
1315 _tuples = None
1317 @cache_readonly
1318 def _engine(self):
1319 # Calculate the number of bits needed to represent labels in each
1320 # level, as log2 of their sizes (including -1 for NaN):
1321 sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels]))
1323 # Sum bit counts, starting from the _right_....
1324 lev_bits = np.cumsum(sizes[::-1])[::-1]
1326 # ... in order to obtain offsets such that sorting the combination of
1327 # shifted codes (one for each level, resulting in a unique integer) is
1328 # equivalent to sorting lexicographically the codes themselves. Notice
1329 # that each level needs to be shifted by the number of bits needed to
1330 # represent the _previous_ ones:
1331 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
1333 # Check the total number of bits needed for our representation:
1334 if lev_bits[0] > 64:
1335 # The levels would overflow a 64 bit uint - use Python integers:
1336 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
1337 return MultiIndexUIntEngine(self.levels, self.codes, offsets)
1339 @property
1340 def values(self):
1341 if self._tuples is not None:
1342 return self._tuples
1344 values = []
1346 for i in range(self.nlevels):
1347 vals = self._get_level_values(i)
1348 if is_categorical_dtype(vals):
1349 vals = vals._internal_get_values()
1350 if isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, "_box_values"):
1351 vals = vals.astype(object)
1352 vals = np.array(vals, copy=False)
1353 values.append(vals)
1355 self._tuples = lib.fast_zip(values)
1356 return self._tuples
1358 @property
1359 def _has_complex_internals(self):
1360 # used to avoid libreduction code paths, which raise or require conversion
1361 return True
1363 @cache_readonly
1364 def is_monotonic_increasing(self) -> bool:
1365 """
1366 return if the index is monotonic increasing (only equal or
1367 increasing) values.
1368 """
1370 if all(x.is_monotonic for x in self.levels):
1371 # If each level is sorted, we can operate on the codes directly. GH27495
1372 return libalgos.is_lexsorted(
1373 [x.astype("int64", copy=False) for x in self.codes]
1374 )
1376 # reversed() because lexsort() wants the most significant key last.
1377 values = [
1378 self._get_level_values(i).values for i in reversed(range(len(self.levels)))
1379 ]
1380 try:
1381 sort_order = np.lexsort(values)
1382 return Index(sort_order).is_monotonic
1383 except TypeError:
1385 # we have mixed types and np.lexsort is not happy
1386 return Index(self.values).is_monotonic
1388 @cache_readonly
1389 def is_monotonic_decreasing(self) -> bool:
1390 """
1391 return if the index is monotonic decreasing (only equal or
1392 decreasing) values.
1393 """
1394 # monotonic decreasing if and only if reverse is monotonic increasing
1395 return self[::-1].is_monotonic_increasing
1397 @cache_readonly
1398 def _have_mixed_levels(self):
1399 """ return a boolean list indicated if we have mixed levels """
1400 return ["mixed" in l for l in self._inferred_type_levels]
1402 @cache_readonly
1403 def _inferred_type_levels(self):
1404 """ return a list of the inferred types, one for each level """
1405 return [i.inferred_type for i in self.levels]
1407 @cache_readonly
1408 def _hashed_values(self):
1409 """ return a uint64 ndarray of my hashed values """
1410 return hash_tuples(self)
1412 def _hashed_indexing_key(self, key):
1413 """
1414 validate and return the hash for the provided key
1416 *this is internal for use for the cython routines*
1418 Parameters
1419 ----------
1420 key : string or tuple
1422 Returns
1423 -------
1424 np.uint64
1426 Notes
1427 -----
1428 we need to stringify if we have mixed levels
1429 """
1431 if not isinstance(key, tuple):
1432 return hash_tuples(key)
1434 if not len(key) == self.nlevels:
1435 raise KeyError
1437 def f(k, stringify):
1438 if stringify and not isinstance(k, str):
1439 k = str(k)
1440 return k
1442 key = tuple(
1443 f(k, stringify) for k, stringify in zip(key, self._have_mixed_levels)
1444 )
1445 return hash_tuple(key)
1447 @Appender(Index.duplicated.__doc__)
1448 def duplicated(self, keep="first"):
1449 shape = map(len, self.levels)
1450 ids = get_group_index(self.codes, shape, sort=False, xnull=False)
1452 return duplicated_int64(ids, keep)
1454 def fillna(self, value=None, downcast=None):
1455 """
1456 fillna is not implemented for MultiIndex
1457 """
1458 raise NotImplementedError("isna is not defined for MultiIndex")
1460 @Appender(_index_shared_docs["dropna"])
1461 def dropna(self, how="any"):
1462 nans = [level_codes == -1 for level_codes in self.codes]
1463 if how == "any":
1464 indexer = np.any(nans, axis=0)
1465 elif how == "all":
1466 indexer = np.all(nans, axis=0)
1467 else:
1468 raise ValueError(f"invalid how option: {how}")
1470 new_codes = [level_codes[~indexer] for level_codes in self.codes]
1471 return self.copy(codes=new_codes, deep=True)
1473 def get_value(self, series, key):
1474 # Label-based
1475 s = com.values_from_object(series)
1476 k = com.values_from_object(key)
1478 def _try_mi(k):
1479 # TODO: what if a level contains tuples??
1480 loc = self.get_loc(k)
1481 new_values = series._values[loc]
1482 new_index = self[loc]
1483 new_index = maybe_droplevels(new_index, k)
1484 return series._constructor(
1485 new_values, index=new_index, name=series.name
1486 ).__finalize__(self)
1488 try:
1489 return self._engine.get_value(s, k)
1490 except KeyError as e1:
1491 try:
1492 return _try_mi(key)
1493 except KeyError:
1494 pass
1496 try:
1497 return libindex.get_value_at(s, k)
1498 except IndexError:
1499 raise
1500 except TypeError:
1501 # generator/iterator-like
1502 if is_iterator(key):
1503 raise InvalidIndexError(key)
1504 else:
1505 raise e1
1506 except Exception: # pragma: no cover
1507 raise e1
1508 except TypeError:
1510 # a Timestamp will raise a TypeError in a multi-index
1511 # rather than a KeyError, try it here
1512 # note that a string that 'looks' like a Timestamp will raise
1513 # a KeyError! (GH5725)
1514 if isinstance(key, (datetime.datetime, np.datetime64, str)):
1515 try:
1516 return _try_mi(key)
1517 except KeyError:
1518 raise
1519 except (IndexError, ValueError, TypeError):
1520 pass
1522 try:
1523 return _try_mi(Timestamp(key))
1524 except (
1525 KeyError,
1526 TypeError,
1527 IndexError,
1528 ValueError,
1529 tslibs.OutOfBoundsDatetime,
1530 ):
1531 pass
1533 raise InvalidIndexError(key)
1535 def _get_level_values(self, level, unique=False):
1536 """
1537 Return vector of label values for requested level,
1538 equal to the length of the index
1540 **this is an internal method**
1542 Parameters
1543 ----------
1544 level : int level
1545 unique : bool, default False
1546 if True, drop duplicated values
1548 Returns
1549 -------
1550 values : ndarray
1551 """
1553 lev = self.levels[level]
1554 level_codes = self.codes[level]
1555 name = self._names[level]
1556 if unique:
1557 level_codes = algos.unique(level_codes)
1558 filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
1559 return lev._shallow_copy(filled, name=name)
1561 def get_level_values(self, level):
1562 """
1563 Return vector of label values for requested level,
1564 equal to the length of the index.
1566 Parameters
1567 ----------
1568 level : int or str
1569 ``level`` is either the integer position of the level in the
1570 MultiIndex, or the name of the level.
1572 Returns
1573 -------
1574 values : Index
1575 Values is a level of this MultiIndex converted to
1576 a single :class:`Index` (or subclass thereof).
1578 Examples
1579 --------
1581 Create a MultiIndex:
1583 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
1584 >>> mi.names = ['level_1', 'level_2']
1586 Get level values by supplying level as either integer or name:
1588 >>> mi.get_level_values(0)
1589 Index(['a', 'b', 'c'], dtype='object', name='level_1')
1590 >>> mi.get_level_values('level_2')
1591 Index(['d', 'e', 'f'], dtype='object', name='level_2')
1592 """
1593 level = self._get_level_number(level)
1594 values = self._get_level_values(level)
1595 return values
1597 @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs)
1598 def unique(self, level=None):
1600 if level is None:
1601 return super().unique()
1602 else:
1603 level = self._get_level_number(level)
1604 return self._get_level_values(level=level, unique=True)
1606 def _to_safe_for_reshape(self):
1607 """ convert to object if we are a categorical """
1608 return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
1610 def to_frame(self, index=True, name=None):
1611 """
1612 Create a DataFrame with the levels of the MultiIndex as columns.
1614 Column ordering is determined by the DataFrame constructor with data as
1615 a dict.
1617 .. versionadded:: 0.24.0
1619 Parameters
1620 ----------
1621 index : bool, default True
1622 Set the index of the returned DataFrame as the original MultiIndex.
1624 name : list / sequence of strings, optional
1625 The passed names should substitute index level names.
1627 Returns
1628 -------
1629 DataFrame : a DataFrame containing the original MultiIndex data.
1631 See Also
1632 --------
1633 DataFrame
1634 """
1636 from pandas import DataFrame
1638 if name is not None:
1639 if not is_list_like(name):
1640 raise TypeError("'name' must be a list / sequence of column names.")
1642 if len(name) != len(self.levels):
1643 raise ValueError(
1644 "'name' should have same length as number of levels on index."
1645 )
1646 idx_names = name
1647 else:
1648 idx_names = self.names
1650 # Guarantee resulting column order - PY36+ dict maintains insertion order
1651 result = DataFrame(
1652 {
1653 (level if lvlname is None else lvlname): self._get_level_values(level)
1654 for lvlname, level in zip(idx_names, range(len(self.levels)))
1655 },
1656 copy=False,
1657 )
1659 if index:
1660 result.index = self
1661 return result
1663 def to_flat_index(self):
1664 """
1665 Convert a MultiIndex to an Index of Tuples containing the level values.
1667 .. versionadded:: 0.24.0
1669 Returns
1670 -------
1671 pd.Index
1672 Index with the MultiIndex data represented in Tuples.
1674 Notes
1675 -----
1676 This method will simply return the caller if called by anything other
1677 than a MultiIndex.
1679 Examples
1680 --------
1681 >>> index = pd.MultiIndex.from_product(
1682 ... [['foo', 'bar'], ['baz', 'qux']],
1683 ... names=['a', 'b'])
1684 >>> index.to_flat_index()
1685 Index([('foo', 'baz'), ('foo', 'qux'),
1686 ('bar', 'baz'), ('bar', 'qux')],
1687 dtype='object')
1688 """
1689 return Index(self.values, tupleize_cols=False)
1691 @property
1692 def is_all_dates(self) -> bool:
1693 return False
1695 def is_lexsorted(self) -> bool:
1696 """
1697 Return True if the codes are lexicographically sorted.
1699 Returns
1700 -------
1701 bool
1702 """
1703 return self.lexsort_depth == self.nlevels
1705 @cache_readonly
1706 def lexsort_depth(self):
1707 if self.sortorder is not None:
1708 return self.sortorder
1710 return self._lexsort_depth()
1712 def _lexsort_depth(self) -> int:
1713 """
1714 Compute and return the lexsort_depth, the number of levels of the
1715 MultiIndex that are sorted lexically
1717 Returns
1718 ------
1719 int
1720 """
1721 int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
1722 for k in range(self.nlevels, 0, -1):
1723 if libalgos.is_lexsorted(int64_codes[:k]):
1724 return k
1725 return 0
1727 def _sort_levels_monotonic(self):
1728 """
1729 This is an *internal* function.
1731 Create a new MultiIndex from the current to monotonically sorted
1732 items IN the levels. This does not actually make the entire MultiIndex
1733 monotonic, JUST the levels.
1735 The resulting MultiIndex will have the same outward
1736 appearance, meaning the same .values and ordering. It will also
1737 be .equals() to the original.
1739 Returns
1740 -------
1741 MultiIndex
1743 Examples
1744 --------
1746 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
1747 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
1748 >>> mi
1749 MultiIndex([('a', 'bb'),
1750 ('a', 'aa'),
1751 ('b', 'bb'),
1752 ('b', 'aa')],
1753 )
1755 >>> mi.sort_values()
1756 MultiIndex([('a', 'aa'),
1757 ('a', 'bb'),
1758 ('b', 'aa'),
1759 ('b', 'bb')],
1760 )
1761 """
1763 if self.is_lexsorted() and self.is_monotonic:
1764 return self
1766 new_levels = []
1767 new_codes = []
1769 for lev, level_codes in zip(self.levels, self.codes):
1771 if not lev.is_monotonic:
1772 try:
1773 # indexer to reorder the levels
1774 indexer = lev.argsort()
1775 except TypeError:
1776 pass
1777 else:
1778 lev = lev.take(indexer)
1780 # indexer to reorder the level codes
1781 indexer = ensure_int64(indexer)
1782 ri = lib.get_reverse_indexer(indexer, len(indexer))
1783 level_codes = algos.take_1d(ri, level_codes)
1785 new_levels.append(lev)
1786 new_codes.append(level_codes)
1788 return MultiIndex(
1789 new_levels,
1790 new_codes,
1791 names=self.names,
1792 sortorder=self.sortorder,
1793 verify_integrity=False,
1794 )
1796 def remove_unused_levels(self):
1797 """
1798 Create a new MultiIndex from the current that removes
1799 unused levels, meaning that they are not expressed in the labels.
1801 The resulting MultiIndex will have the same outward
1802 appearance, meaning the same .values and ordering. It will also
1803 be .equals() to the original.
1805 Returns
1806 -------
1807 MultiIndex
1809 Examples
1810 --------
1811 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])
1812 >>> mi
1813 MultiIndex([(0, 'a'),
1814 (0, 'b'),
1815 (1, 'a'),
1816 (1, 'b')],
1817 )
1819 >>> mi[2:]
1820 MultiIndex([(1, 'a'),
1821 (1, 'b')],
1822 )
1824 The 0 from the first level is not represented
1825 and can be removed
1827 >>> mi2 = mi[2:].remove_unused_levels()
1828 >>> mi2.levels
1829 FrozenList([[1], ['a', 'b']])
1830 """
1832 new_levels = []
1833 new_codes = []
1835 changed = False
1836 for lev, level_codes in zip(self.levels, self.codes):
1838 # Since few levels are typically unused, bincount() is more
1839 # efficient than unique() - however it only accepts positive values
1840 # (and drops order):
1841 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
1842 has_na = int(len(uniques) and (uniques[0] == -1))
1844 if len(uniques) != len(lev) + has_na:
1845 # We have unused levels
1846 changed = True
1848 # Recalculate uniques, now preserving order.
1849 # Can easily be cythonized by exploiting the already existing
1850 # "uniques" and stop parsing "level_codes" when all items
1851 # are found:
1852 uniques = algos.unique(level_codes)
1853 if has_na:
1854 na_idx = np.where(uniques == -1)[0]
1855 # Just ensure that -1 is in first position:
1856 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
1858 # codes get mapped from uniques to 0:len(uniques)
1859 # -1 (if present) is mapped to last position
1860 code_mapping = np.zeros(len(lev) + has_na)
1861 # ... and reassigned value -1:
1862 code_mapping[uniques] = np.arange(len(uniques)) - has_na
1864 level_codes = code_mapping[level_codes]
1866 # new levels are simple
1867 lev = lev.take(uniques[has_na:])
1869 new_levels.append(lev)
1870 new_codes.append(level_codes)
1872 result = self.view()
1874 if changed:
1875 result._reset_identity()
1876 result._set_levels(new_levels, validate=False)
1877 result._set_codes(new_codes, validate=False)
1879 return result
1881 @property
1882 def nlevels(self) -> int:
1883 """
1884 Integer number of levels in this MultiIndex.
1885 """
1886 return len(self._levels)
1888 @property
1889 def levshape(self):
1890 """
1891 A tuple with the length of each level.
1892 """
1893 return tuple(len(x) for x in self.levels)
1895 def __reduce__(self):
1896 """Necessary for making this object picklable"""
1897 d = dict(
1898 levels=list(self.levels),
1899 codes=list(self.codes),
1900 sortorder=self.sortorder,
1901 names=list(self.names),
1902 )
1903 return ibase._new_Index, (type(self), d), None
1905 def __setstate__(self, state):
1906 """Necessary for making this object picklable"""
1908 if isinstance(state, dict):
1909 levels = state.get("levels")
1910 codes = state.get("codes")
1911 sortorder = state.get("sortorder")
1912 names = state.get("names")
1914 elif isinstance(state, tuple):
1916 nd_state, own_state = state
1917 levels, codes, sortorder, names = own_state
1919 self._set_levels([Index(x) for x in levels], validate=False)
1920 self._set_codes(codes)
1921 new_codes = self._verify_integrity()
1922 self._set_codes(new_codes)
1923 self._set_names(names)
1924 self.sortorder = sortorder
1925 self._reset_identity()
1927 def __getitem__(self, key):
1928 if is_scalar(key):
1929 key = com.cast_scalar_indexer(key)
1931 retval = []
1932 for lev, level_codes in zip(self.levels, self.codes):
1933 if level_codes[key] == -1:
1934 retval.append(np.nan)
1935 else:
1936 retval.append(lev[level_codes[key]])
1938 return tuple(retval)
1939 else:
1940 if com.is_bool_indexer(key):
1941 key = np.asarray(key, dtype=bool)
1942 sortorder = self.sortorder
1943 else:
1944 # cannot be sure whether the result will be sorted
1945 sortorder = None
1947 if isinstance(key, Index):
1948 key = np.asarray(key)
1950 new_codes = [level_codes[key] for level_codes in self.codes]
1952 return MultiIndex(
1953 levels=self.levels,
1954 codes=new_codes,
1955 names=self.names,
1956 sortorder=sortorder,
1957 verify_integrity=False,
1958 )
1960 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
1961 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
1962 nv.validate_take(tuple(), kwargs)
1963 indices = ensure_platform_int(indices)
1964 taken = self._assert_take_fillable(
1965 self.codes,
1966 indices,
1967 allow_fill=allow_fill,
1968 fill_value=fill_value,
1969 na_value=-1,
1970 )
1971 return MultiIndex(
1972 levels=self.levels, codes=taken, names=self.names, verify_integrity=False
1973 )
1975 def _assert_take_fillable(
1976 self, values, indices, allow_fill=True, fill_value=None, na_value=None
1977 ):
1978 """ Internal method to handle NA filling of take """
1979 # only fill if we are passing a non-None fill_value
1980 if allow_fill and fill_value is not None:
1981 if (indices < -1).any():
1982 msg = (
1983 "When allow_fill=True and fill_value is not None, "
1984 "all indices must be >= -1"
1985 )
1986 raise ValueError(msg)
1987 taken = [lab.take(indices) for lab in self.codes]
1988 mask = indices == -1
1989 if mask.any():
1990 masked = []
1991 for new_label in taken:
1992 label_values = new_label
1993 label_values[mask] = na_value
1994 masked.append(np.asarray(label_values))
1995 taken = masked
1996 else:
1997 taken = [lab.take(indices) for lab in self.codes]
1998 return taken
2000 def append(self, other):
2001 """
2002 Append a collection of Index options together
2004 Parameters
2005 ----------
2006 other : Index or list/tuple of indices
2008 Returns
2009 -------
2010 appended : Index
2011 """
2012 if not isinstance(other, (list, tuple)):
2013 other = [other]
2015 if all(
2016 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
2017 ):
2018 arrays = []
2019 for i in range(self.nlevels):
2020 label = self._get_level_values(i)
2021 appended = [o._get_level_values(i) for o in other]
2022 arrays.append(label.append(appended))
2023 return MultiIndex.from_arrays(arrays, names=self.names)
2025 to_concat = (self.values,) + tuple(k._values for k in other)
2026 new_tuples = np.concatenate(to_concat)
2028 # if all(isinstance(x, MultiIndex) for x in other):
2029 try:
2030 return MultiIndex.from_tuples(new_tuples, names=self.names)
2031 except (TypeError, IndexError):
2032 return Index(new_tuples)
2034 def argsort(self, *args, **kwargs):
2035 return self.values.argsort(*args, **kwargs)
2037 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
2038 def repeat(self, repeats, axis=None):
2039 nv.validate_repeat(tuple(), dict(axis=axis))
2040 repeats = ensure_platform_int(repeats)
2041 return MultiIndex(
2042 levels=self.levels,
2043 codes=[
2044 level_codes.view(np.ndarray).astype(np.intp).repeat(repeats)
2045 for level_codes in self.codes
2046 ],
2047 names=self.names,
2048 sortorder=self.sortorder,
2049 verify_integrity=False,
2050 )
2052 def where(self, cond, other=None):
2053 raise NotImplementedError(".where is not supported for MultiIndex operations")
2055 def drop(self, codes, level=None, errors="raise"):
2056 """
2057 Make new MultiIndex with passed list of codes deleted
2059 Parameters
2060 ----------
2061 codes : array-like
2062 Must be a list of tuples
2063 level : int or level name, default None
2064 errors : str, default 'raise'
2066 Returns
2067 -------
2068 dropped : MultiIndex
2069 """
2070 if level is not None:
2071 return self._drop_from_level(codes, level, errors)
2073 if not isinstance(codes, (np.ndarray, Index)):
2074 try:
2075 codes = com.index_labels_to_array(codes, dtype=object)
2076 except ValueError:
2077 pass
2079 inds = []
2080 for level_codes in codes:
2081 try:
2082 loc = self.get_loc(level_codes)
2083 # get_loc returns either an integer, a slice, or a boolean
2084 # mask
2085 if isinstance(loc, int):
2086 inds.append(loc)
2087 elif isinstance(loc, slice):
2088 inds.extend(range(loc.start, loc.stop))
2089 elif com.is_bool_indexer(loc):
2090 if self.lexsort_depth == 0:
2091 warnings.warn(
2092 "dropping on a non-lexsorted multi-index "
2093 "without a level parameter may impact performance.",
2094 PerformanceWarning,
2095 stacklevel=3,
2096 )
2097 loc = loc.nonzero()[0]
2098 inds.extend(loc)
2099 else:
2100 msg = f"unsupported indexer of type {type(loc)}"
2101 raise AssertionError(msg)
2102 except KeyError:
2103 if errors != "ignore":
2104 raise
2106 return self.delete(inds)
2108 def _drop_from_level(self, codes, level, errors="raise"):
2109 codes = com.index_labels_to_array(codes)
2110 i = self._get_level_number(level)
2111 index = self.levels[i]
2112 values = index.get_indexer(codes)
2114 mask = ~algos.isin(self.codes[i], values)
2115 if mask.all() and errors != "ignore":
2116 raise KeyError(f"labels {codes} not found in level")
2118 return self[mask]
2120 def swaplevel(self, i=-2, j=-1):
2121 """
2122 Swap level i with level j.
2124 Calling this method does not change the ordering of the values.
2126 Parameters
2127 ----------
2128 i : int, str, default -2
2129 First level of index to be swapped. Can pass level name as string.
2130 Type of parameters can be mixed.
2131 j : int, str, default -1
2132 Second level of index to be swapped. Can pass level name as string.
2133 Type of parameters can be mixed.
2135 Returns
2136 -------
2137 MultiIndex
2138 A new MultiIndex.
2140 See Also
2141 --------
2142 Series.swaplevel : Swap levels i and j in a MultiIndex.
2143 Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a
2144 particular axis.
2146 Examples
2147 --------
2148 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
2149 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2150 >>> mi
2151 MultiIndex([('a', 'bb'),
2152 ('a', 'aa'),
2153 ('b', 'bb'),
2154 ('b', 'aa')],
2155 )
2156 >>> mi.swaplevel(0, 1)
2157 MultiIndex([('bb', 'a'),
2158 ('aa', 'a'),
2159 ('bb', 'b'),
2160 ('aa', 'b')],
2161 )
2162 """
2163 new_levels = list(self.levels)
2164 new_codes = list(self.codes)
2165 new_names = list(self.names)
2167 i = self._get_level_number(i)
2168 j = self._get_level_number(j)
2170 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
2171 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
2172 new_names[i], new_names[j] = new_names[j], new_names[i]
2174 return MultiIndex(
2175 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2176 )
2178 def reorder_levels(self, order):
2179 """
2180 Rearrange levels using input order. May not drop or duplicate levels.
2182 Parameters
2183 ----------
2185 Returns
2186 -------
2187 MultiIndex
2188 """
2189 order = [self._get_level_number(i) for i in order]
2190 if len(order) != self.nlevels:
2191 raise AssertionError(
2192 f"Length of order must be same as number of levels ({self.nlevels}),"
2193 f" got {len(order)}"
2194 )
2195 new_levels = [self.levels[i] for i in order]
2196 new_codes = [self.codes[i] for i in order]
2197 new_names = [self.names[i] for i in order]
2199 return MultiIndex(
2200 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2201 )
2203 def _get_codes_for_sorting(self):
2204 """
2205 we categorizing our codes by using the
2206 available categories (all, not just observed)
2207 excluding any missing ones (-1); this is in preparation
2208 for sorting, where we need to disambiguate that -1 is not
2209 a valid valid
2210 """
2212 def cats(level_codes):
2213 return np.arange(
2214 np.array(level_codes).max() + 1 if len(level_codes) else 0,
2215 dtype=level_codes.dtype,
2216 )
2218 return [
2219 Categorical.from_codes(level_codes, cats(level_codes), ordered=True)
2220 for level_codes in self.codes
2221 ]
2223 def sortlevel(self, level=0, ascending=True, sort_remaining=True):
2224 """
2225 Sort MultiIndex at the requested level. The result will respect the
2226 original ordering of the associated factor at that level.
2228 Parameters
2229 ----------
2230 level : list-like, int or str, default 0
2231 If a string is given, must be a name of the level.
2232 If list-like must be names or ints of levels.
2233 ascending : bool, default True
2234 False to sort in descending order.
2235 Can also be a list to specify a directed ordering.
2236 sort_remaining : sort by the remaining levels after level
2238 Returns
2239 -------
2240 sorted_index : pd.MultiIndex
2241 Resulting index.
2242 indexer : np.ndarray
2243 Indices of output values in original index.
2244 """
2245 if isinstance(level, (str, int)):
2246 level = [level]
2247 level = [self._get_level_number(lev) for lev in level]
2248 sortorder = None
2250 # we have a directed ordering via ascending
2251 if isinstance(ascending, list):
2252 if not len(level) == len(ascending):
2253 raise ValueError("level must have same length as ascending")
2255 indexer = lexsort_indexer(
2256 [self.codes[lev] for lev in level], orders=ascending
2257 )
2259 # level ordering
2260 else:
2262 codes = list(self.codes)
2263 shape = list(self.levshape)
2265 # partition codes and shape
2266 primary = tuple(codes[lev] for lev in level)
2267 primshp = tuple(shape[lev] for lev in level)
2269 # Reverse sorted to retain the order of
2270 # smaller indices that needs to be removed
2271 for lev in sorted(level, reverse=True):
2272 codes.pop(lev)
2273 shape.pop(lev)
2275 if sort_remaining:
2276 primary += primary + tuple(codes)
2277 primshp += primshp + tuple(shape)
2278 else:
2279 sortorder = level[0]
2281 indexer = indexer_from_factorized(primary, primshp, compress=False)
2283 if not ascending:
2284 indexer = indexer[::-1]
2286 indexer = ensure_platform_int(indexer)
2287 new_codes = [level_codes.take(indexer) for level_codes in self.codes]
2289 new_index = MultiIndex(
2290 codes=new_codes,
2291 levels=self.levels,
2292 names=self.names,
2293 sortorder=sortorder,
2294 verify_integrity=False,
2295 )
2297 return new_index, indexer
2299 def _convert_listlike_indexer(self, keyarr, kind=None):
2300 """
2301 Parameters
2302 ----------
2303 keyarr : list-like
2304 Indexer to convert.
2306 Returns
2307 -------
2308 tuple (indexer, keyarr)
2309 indexer is an ndarray or None if cannot convert
2310 keyarr are tuple-safe keys
2311 """
2312 indexer, keyarr = super()._convert_listlike_indexer(keyarr, kind=kind)
2314 # are we indexing a specific level
2315 if indexer is None and len(keyarr) and not isinstance(keyarr[0], tuple):
2316 level = 0
2317 _, indexer = self.reindex(keyarr, level=level)
2319 # take all
2320 if indexer is None:
2321 indexer = np.arange(len(self))
2323 check = self.levels[0].get_indexer(keyarr)
2324 mask = check == -1
2325 if mask.any():
2326 raise KeyError(f"{keyarr[mask]} not in index")
2328 return indexer, keyarr
2330 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
2331 def get_indexer(self, target, method=None, limit=None, tolerance=None):
2332 method = missing.clean_reindex_fill_method(method)
2333 target = ensure_index(target)
2335 # empty indexer
2336 if is_list_like(target) and not len(target):
2337 return ensure_platform_int(np.array([]))
2339 if not isinstance(target, MultiIndex):
2340 try:
2341 target = MultiIndex.from_tuples(target)
2342 except (TypeError, ValueError):
2344 # let's instead try with a straight Index
2345 if method is None:
2346 return Index(self.values).get_indexer(
2347 target, method=method, limit=limit, tolerance=tolerance
2348 )
2350 if not self.is_unique:
2351 raise ValueError("Reindexing only valid with uniquely valued Index objects")
2353 if method == "pad" or method == "backfill":
2354 if tolerance is not None:
2355 raise NotImplementedError(
2356 "tolerance not implemented yet for MultiIndex"
2357 )
2358 indexer = self._engine.get_indexer(target, method, limit)
2359 elif method == "nearest":
2360 raise NotImplementedError(
2361 "method='nearest' not implemented yet "
2362 "for MultiIndex; see GitHub issue 9365"
2363 )
2364 else:
2365 indexer = self._engine.get_indexer(target)
2367 return ensure_platform_int(indexer)
2369 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
2370 def get_indexer_non_unique(self, target):
2371 return super().get_indexer_non_unique(target)
2373 def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
2374 """
2375 Create index with target's values (move/add/delete values as necessary)
2377 Returns
2378 -------
2379 new_index : pd.MultiIndex
2380 Resulting index
2381 indexer : np.ndarray or None
2382 Indices of output values in original index.
2384 """
2385 # GH6552: preserve names when reindexing to non-named target
2386 # (i.e. neither Index nor Series).
2387 preserve_names = not hasattr(target, "names")
2389 if level is not None:
2390 if method is not None:
2391 raise TypeError("Fill method not supported if level passed")
2393 # GH7774: preserve dtype/tz if target is empty and not an Index.
2394 # target may be an iterator
2395 target = ibase._ensure_has_len(target)
2396 if len(target) == 0 and not isinstance(target, Index):
2397 idx = self.levels[level]
2398 attrs = idx._get_attributes_dict()
2399 attrs.pop("freq", None) # don't preserve freq
2400 target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), **attrs)
2401 else:
2402 target = ensure_index(target)
2403 target, indexer, _ = self._join_level(
2404 target, level, how="right", return_indexers=True, keep_order=False
2405 )
2406 else:
2407 target = ensure_index(target)
2408 if self.equals(target):
2409 indexer = None
2410 else:
2411 if self.is_unique:
2412 indexer = self.get_indexer(
2413 target, method=method, limit=limit, tolerance=tolerance
2414 )
2415 else:
2416 raise ValueError("cannot handle a non-unique multi-index!")
2418 if not isinstance(target, MultiIndex):
2419 if indexer is None:
2420 target = self
2421 elif (indexer >= 0).all():
2422 target = self.take(indexer)
2423 else:
2424 # hopefully?
2425 target = MultiIndex.from_tuples(target)
2427 if (
2428 preserve_names
2429 and target.nlevels == self.nlevels
2430 and target.names != self.names
2431 ):
2432 target = target.copy(deep=False)
2433 target.names = self.names
2435 return target, indexer
2437 def get_slice_bound(
2438 self, label: Union[Hashable, Sequence[Hashable]], side: str, kind: str
2439 ) -> int:
2440 """
2441 For an ordered MultiIndex, compute slice bound
2442 that corresponds to given label.
2444 Returns leftmost (one-past-the-rightmost if `side=='right') position
2445 of given label.
2447 Parameters
2448 ----------
2449 label : object or tuple of objects
2450 side : {'left', 'right'}
2451 kind : {'loc', 'getitem'}
2453 Returns
2454 -------
2455 int
2456 Index of label.
2458 Notes
2459 -----
2460 This method only works if level 0 index of the MultiIndex is lexsorted.
2462 Examples
2463 --------
2464 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])
2466 Get the locations from the leftmost 'b' in the first level
2467 until the end of the multiindex:
2469 >>> mi.get_slice_bound('b', side="left", kind="loc")
2470 1
2472 Like above, but if you get the locations from the rightmost
2473 'b' in the first level and 'f' in the second level:
2475 >>> mi.get_slice_bound(('b','f'), side="right", kind="loc")
2476 3
2478 See Also
2479 --------
2480 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2481 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2482 sequence of such.
2483 """
2485 if not isinstance(label, tuple):
2486 label = (label,)
2487 return self._partial_tup_index(label, side=side)
2489 def slice_locs(self, start=None, end=None, step=None, kind=None):
2490 """
2491 For an ordered MultiIndex, compute the slice locations for input
2492 labels.
2494 The input labels can be tuples representing partial levels, e.g. for a
2495 MultiIndex with 3 levels, you can pass a single value (corresponding to
2496 the first level), or a 1-, 2-, or 3-tuple.
2498 Parameters
2499 ----------
2500 start : label or tuple, default None
2501 If None, defaults to the beginning
2502 end : label or tuple
2503 If None, defaults to the end
2504 step : int or None
2505 Slice step
2506 kind : string, optional, defaults None
2508 Returns
2509 -------
2510 (start, end) : (int, int)
2512 Notes
2513 -----
2514 This method only works if the MultiIndex is properly lexsorted. So,
2515 if only the first 2 levels of a 3-level MultiIndex are lexsorted,
2516 you can only pass two levels to ``.slice_locs``.
2518 Examples
2519 --------
2520 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
2521 ... names=['A', 'B'])
2523 Get the slice locations from the beginning of 'b' in the first level
2524 until the end of the multiindex:
2526 >>> mi.slice_locs(start='b')
2527 (1, 4)
2529 Like above, but stop at the end of 'b' in the first level and 'f' in
2530 the second level:
2532 >>> mi.slice_locs(start='b', end=('b', 'f'))
2533 (1, 3)
2535 See Also
2536 --------
2537 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2538 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2539 sequence of such.
2540 """
2541 # This function adds nothing to its parent implementation (the magic
2542 # happens in get_slice_bound method), but it adds meaningful doc.
2543 return super().slice_locs(start, end, step, kind=kind)
2545 def _partial_tup_index(self, tup, side="left"):
2546 if len(tup) > self.lexsort_depth:
2547 raise UnsortedIndexError(
2548 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth"
2549 f" ({self.lexsort_depth})"
2550 )
2552 n = len(tup)
2553 start, end = 0, len(self)
2554 zipped = zip(tup, self.levels, self.codes)
2555 for k, (lab, lev, labs) in enumerate(zipped):
2556 section = labs[start:end]
2558 if lab not in lev and np.ndim(lab) == 0 and not isna(lab):
2559 if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)):
2560 raise TypeError(f"Level type mismatch: {lab}")
2562 # short circuit
2563 loc = lev.searchsorted(lab, side=side)
2564 if side == "right" and loc >= 0:
2565 loc -= 1
2566 return start + section.searchsorted(loc, side=side)
2568 idx = self._get_loc_single_level_index(lev, lab)
2569 if k < n - 1:
2570 end = start + section.searchsorted(idx, side="right")
2571 start = start + section.searchsorted(idx, side="left")
2572 else:
2573 return start + section.searchsorted(idx, side=side)
2575 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
2576 """
2577 If key is NA value, location of index unify as -1.
2579 Parameters
2580 ----------
2581 level_index: Index
2582 key : label
2584 Returns
2585 -------
2586 loc : int
2587 If key is NA value, loc is -1
2588 Else, location of key in index.
2590 See Also
2591 --------
2592 Index.get_loc : The get_loc method for (single-level) index.
2593 """
2595 if is_scalar(key) and isna(key):
2596 return -1
2597 else:
2598 return level_index.get_loc(key)
2600 def get_loc(self, key, method=None):
2601 """
2602 Get location for a label or a tuple of labels as an integer, slice or
2603 boolean mask.
2605 Parameters
2606 ----------
2607 key : label or tuple of labels (one for each level)
2608 method : None
2610 Returns
2611 -------
2612 loc : int, slice object or boolean mask
2613 If the key is past the lexsort depth, the return may be a
2614 boolean mask array, otherwise it is always a slice or int.
2616 See Also
2617 --------
2618 Index.get_loc : The get_loc method for (single-level) index.
2619 MultiIndex.slice_locs : Get slice location given start label(s) and
2620 end label(s).
2621 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2622 sequence of such.
2624 Notes
2625 -----
2626 The key cannot be a slice, list of same-level labels, a boolean mask,
2627 or a sequence of such. If you want to use those, use
2628 :meth:`MultiIndex.get_locs` instead.
2630 Examples
2631 --------
2632 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
2634 >>> mi.get_loc('b')
2635 slice(1, 3, None)
2637 >>> mi.get_loc(('b', 'e'))
2638 1
2639 """
2640 if method is not None:
2641 raise NotImplementedError(
2642 "only the default get_loc method is "
2643 "currently supported for MultiIndex"
2644 )
2646 def _maybe_to_slice(loc):
2647 """convert integer indexer to boolean mask or slice if possible"""
2648 if not isinstance(loc, np.ndarray) or loc.dtype != "int64":
2649 return loc
2651 loc = lib.maybe_indices_to_slice(loc, len(self))
2652 if isinstance(loc, slice):
2653 return loc
2655 mask = np.empty(len(self), dtype="bool")
2656 mask.fill(False)
2657 mask[loc] = True
2658 return mask
2660 if not isinstance(key, (tuple, list)):
2661 # not including list here breaks some indexing, xref #30892
2662 loc = self._get_level_indexer(key, level=0)
2663 return _maybe_to_slice(loc)
2665 keylen = len(key)
2666 if self.nlevels < keylen:
2667 raise KeyError(
2668 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"
2669 )
2671 if keylen == self.nlevels and self.is_unique:
2672 return self._engine.get_loc(key)
2674 # -- partial selection or non-unique index
2675 # break the key into 2 parts based on the lexsort_depth of the index;
2676 # the first part returns a continuous slice of the index; the 2nd part
2677 # needs linear search within the slice
2678 i = self.lexsort_depth
2679 lead_key, follow_key = key[:i], key[i:]
2680 start, stop = (
2681 self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self))
2682 )
2684 if start == stop:
2685 raise KeyError(key)
2687 if not follow_key:
2688 return slice(start, stop)
2690 warnings.warn(
2691 "indexing past lexsort depth may impact performance.",
2692 PerformanceWarning,
2693 stacklevel=10,
2694 )
2696 loc = np.arange(start, stop, dtype="int64")
2698 for i, k in enumerate(follow_key, len(lead_key)):
2699 mask = self.codes[i][loc] == self._get_loc_single_level_index(
2700 self.levels[i], k
2701 )
2702 if not mask.all():
2703 loc = loc[mask]
2704 if not len(loc):
2705 raise KeyError(key)
2707 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)
2709 def get_loc_level(self, key, level=0, drop_level: bool = True):
2710 """
2711 Get both the location for the requested label(s) and the
2712 resulting sliced index.
2714 Parameters
2715 ----------
2716 key : label or sequence of labels
2717 level : int/level name or list thereof, optional
2718 drop_level : bool, default True
2719 If ``False``, the resulting index will not drop any level.
2721 Returns
2722 -------
2723 loc : A 2-tuple where the elements are:
2724 Element 0: int, slice object or boolean array
2725 Element 1: The resulting sliced multiindex/index. If the key
2726 contains all levels, this will be ``None``.
2728 See Also
2729 --------
2730 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2731 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2732 sequence of such.
2734 Examples
2735 --------
2736 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
2737 ... names=['A', 'B'])
2739 >>> mi.get_loc_level('b')
2740 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
2742 >>> mi.get_loc_level('e', level='B')
2743 (array([False, True, False], dtype=bool),
2744 Index(['b'], dtype='object', name='A'))
2746 >>> mi.get_loc_level(['b', 'e'])
2747 (1, None)
2748 """
2750 # different name to distinguish from maybe_droplevels
2751 def maybe_mi_droplevels(indexer, levels, drop_level: bool):
2752 if not drop_level:
2753 return self[indexer]
2754 # kludgearound
2755 orig_index = new_index = self[indexer]
2756 levels = [self._get_level_number(i) for i in levels]
2757 for i in sorted(levels, reverse=True):
2758 try:
2759 new_index = new_index.droplevel(i)
2760 except ValueError:
2762 # no dropping here
2763 return orig_index
2764 return new_index
2766 if isinstance(level, (tuple, list)):
2767 if len(key) != len(level):
2768 raise AssertionError(
2769 "Key for location must have same length as number of levels"
2770 )
2771 result = None
2772 for lev, k in zip(level, key):
2773 loc, new_index = self.get_loc_level(k, level=lev)
2774 if isinstance(loc, slice):
2775 mask = np.zeros(len(self), dtype=bool)
2776 mask[loc] = True
2777 loc = mask
2779 result = loc if result is None else result & loc
2781 return result, maybe_mi_droplevels(result, level, drop_level)
2783 level = self._get_level_number(level)
2785 # kludge for #1796
2786 if isinstance(key, list):
2787 key = tuple(key)
2789 if isinstance(key, tuple) and level == 0:
2791 try:
2792 if key in self.levels[0]:
2793 indexer = self._get_level_indexer(key, level=level)
2794 new_index = maybe_mi_droplevels(indexer, [0], drop_level)
2795 return indexer, new_index
2796 except TypeError:
2797 pass
2799 if not any(isinstance(k, slice) for k in key):
2801 # partial selection
2802 # optionally get indexer to avoid re-calculation
2803 def partial_selection(key, indexer=None):
2804 if indexer is None:
2805 indexer = self.get_loc(key)
2806 ilevels = [
2807 i for i in range(len(key)) if key[i] != slice(None, None)
2808 ]
2809 return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level)
2811 if len(key) == self.nlevels and self.is_unique:
2812 # Complete key in unique index -> standard get_loc
2813 try:
2814 return (self._engine.get_loc(key), None)
2815 except KeyError as e:
2816 raise KeyError(key) from e
2817 else:
2818 return partial_selection(key)
2819 else:
2820 indexer = None
2821 for i, k in enumerate(key):
2822 if not isinstance(k, slice):
2823 k = self._get_level_indexer(k, level=i)
2824 if isinstance(k, slice):
2825 # everything
2826 if k.start == 0 and k.stop == len(self):
2827 k = slice(None, None)
2828 else:
2829 k_index = k
2831 if isinstance(k, slice):
2832 if k == slice(None, None):
2833 continue
2834 else:
2835 raise TypeError(key)
2837 if indexer is None:
2838 indexer = k_index
2839 else: # pragma: no cover
2840 indexer &= k_index
2841 if indexer is None:
2842 indexer = slice(None, None)
2843 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
2844 return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level)
2845 else:
2846 indexer = self._get_level_indexer(key, level=level)
2847 return indexer, maybe_mi_droplevels(indexer, [level], drop_level)
2849 def _get_level_indexer(self, key, level=0, indexer=None):
2850 # return an indexer, boolean array or a slice showing where the key is
2851 # in the totality of values
2852 # if the indexer is provided, then use this
2854 level_index = self.levels[level]
2855 level_codes = self.codes[level]
2857 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
2858 # given the inputs and the codes/indexer, compute an indexer set
2859 # if we have a provided indexer, then this need not consider
2860 # the entire labels set
2862 r = np.arange(start, stop, step)
2863 if indexer is not None and len(indexer) != len(codes):
2865 # we have an indexer which maps the locations in the labels
2866 # that we have already selected (and is not an indexer for the
2867 # entire set) otherwise this is wasteful so we only need to
2868 # examine locations that are in this set the only magic here is
2869 # that the result are the mappings to the set that we have
2870 # selected
2871 from pandas import Series
2873 mapper = Series(indexer)
2874 indexer = codes.take(ensure_platform_int(indexer))
2875 result = Series(Index(indexer).isin(r).nonzero()[0])
2876 m = result.map(mapper)._ndarray_values
2878 else:
2879 m = np.zeros(len(codes), dtype=bool)
2880 m[np.in1d(codes, r, assume_unique=Index(codes).is_unique)] = True
2882 return m
2884 if isinstance(key, slice):
2885 # handle a slice, returning a slice if we can
2886 # otherwise a boolean indexer
2888 try:
2889 if key.start is not None:
2890 start = level_index.get_loc(key.start)
2891 else:
2892 start = 0
2893 if key.stop is not None:
2894 stop = level_index.get_loc(key.stop)
2895 else:
2896 stop = len(level_index) - 1
2897 step = key.step
2898 except KeyError:
2900 # we have a partial slice (like looking up a partial date
2901 # string)
2902 start = stop = level_index.slice_indexer(
2903 key.start, key.stop, key.step, kind="loc"
2904 )
2905 step = start.step
2907 if isinstance(start, slice) or isinstance(stop, slice):
2908 # we have a slice for start and/or stop
2909 # a partial date slicer on a DatetimeIndex generates a slice
2910 # note that the stop ALREADY includes the stopped point (if
2911 # it was a string sliced)
2912 start = getattr(start, "start", start)
2913 stop = getattr(stop, "stop", stop)
2914 return convert_indexer(start, stop, step)
2916 elif level > 0 or self.lexsort_depth == 0 or step is not None:
2917 # need to have like semantics here to right
2918 # searching as when we are using a slice
2919 # so include the stop+1 (so we include stop)
2920 return convert_indexer(start, stop + 1, step)
2921 else:
2922 # sorted, so can return slice object -> view
2923 i = level_codes.searchsorted(start, side="left")
2924 j = level_codes.searchsorted(stop, side="right")
2925 return slice(i, j, step)
2927 else:
2929 code = self._get_loc_single_level_index(level_index, key)
2931 if level > 0 or self.lexsort_depth == 0:
2932 # Desired level is not sorted
2933 locs = np.array(level_codes == code, dtype=bool, copy=False)
2934 if not locs.any():
2935 # The label is present in self.levels[level] but unused:
2936 raise KeyError(key)
2937 return locs
2939 i = level_codes.searchsorted(code, side="left")
2940 j = level_codes.searchsorted(code, side="right")
2941 if i == j:
2942 # The label is present in self.levels[level] but unused:
2943 raise KeyError(key)
2944 return slice(i, j)
2946 def get_locs(self, seq):
2947 """
2948 Get location for a sequence of labels.
2950 Parameters
2951 ----------
2952 seq : label, slice, list, mask or a sequence of such
2953 You should use one of the above for each level.
2954 If a level should not be used, set it to ``slice(None)``.
2956 Returns
2957 -------
2958 numpy.ndarray
2959 NumPy array of integers suitable for passing to iloc.
2961 See Also
2962 --------
2963 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2964 MultiIndex.slice_locs : Get slice location given start label(s) and
2965 end label(s).
2967 Examples
2968 --------
2969 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
2971 >>> mi.get_locs('b') # doctest: +SKIP
2972 array([1, 2], dtype=int64)
2974 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP
2975 array([1, 2], dtype=int64)
2977 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP
2978 array([2], dtype=int64)
2979 """
2980 from pandas.core.indexes.numeric import Int64Index
2982 # must be lexsorted to at least as many levels
2983 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
2984 if true_slices and true_slices[-1] >= self.lexsort_depth:
2985 raise UnsortedIndexError(
2986 "MultiIndex slicing requires the index to be lexsorted: slicing "
2987 f"on levels {true_slices}, lexsort depth {self.lexsort_depth}"
2988 )
2989 # indexer
2990 # this is the list of all values that we want to select
2991 n = len(self)
2992 indexer = None
2994 def _convert_to_indexer(r):
2995 # return an indexer
2996 if isinstance(r, slice):
2997 m = np.zeros(n, dtype=bool)
2998 m[r] = True
2999 r = m.nonzero()[0]
3000 elif com.is_bool_indexer(r):
3001 if len(r) != n:
3002 raise ValueError(
3003 "cannot index with a boolean indexer "
3004 "that is not the same length as the "
3005 "index"
3006 )
3007 r = r.nonzero()[0]
3008 return Int64Index(r)
3010 def _update_indexer(idxr, indexer=indexer):
3011 if indexer is None:
3012 indexer = Index(np.arange(n))
3013 if idxr is None:
3014 return indexer
3015 return indexer & idxr
3017 for i, k in enumerate(seq):
3019 if com.is_bool_indexer(k):
3020 # a boolean indexer, must be the same length!
3021 k = np.asarray(k)
3022 indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer)
3024 elif is_list_like(k):
3025 # a collection of labels to include from this level (these
3026 # are or'd)
3027 indexers = None
3028 for x in k:
3029 try:
3030 idxrs = _convert_to_indexer(
3031 self._get_level_indexer(x, level=i, indexer=indexer)
3032 )
3033 indexers = idxrs if indexers is None else indexers | idxrs
3034 except KeyError:
3036 # ignore not founds
3037 continue
3039 if indexers is not None:
3040 indexer = _update_indexer(indexers, indexer=indexer)
3041 else:
3042 # no matches we are done
3043 return Int64Index([])._ndarray_values
3045 elif com.is_null_slice(k):
3046 # empty slice
3047 indexer = _update_indexer(None, indexer=indexer)
3049 elif isinstance(k, slice):
3051 # a slice, include BOTH of the labels
3052 indexer = _update_indexer(
3053 _convert_to_indexer(
3054 self._get_level_indexer(k, level=i, indexer=indexer)
3055 ),
3056 indexer=indexer,
3057 )
3058 else:
3059 # a single label
3060 indexer = _update_indexer(
3061 _convert_to_indexer(
3062 self.get_loc_level(k, level=i, drop_level=False)[0]
3063 ),
3064 indexer=indexer,
3065 )
3067 # empty indexer
3068 if indexer is None:
3069 return Int64Index([])._ndarray_values
3070 return indexer._ndarray_values
3072 def truncate(self, before=None, after=None):
3073 """
3074 Slice index between two labels / tuples, return new MultiIndex
3076 Parameters
3077 ----------
3078 before : label or tuple, can be partial. Default None
3079 None defaults to start
3080 after : label or tuple, can be partial. Default None
3081 None defaults to end
3083 Returns
3084 -------
3085 truncated : MultiIndex
3086 """
3087 if after and before and after < before:
3088 raise ValueError("after < before")
3090 i, j = self.levels[0].slice_locs(before, after)
3091 left, right = self.slice_locs(before, after)
3093 new_levels = list(self.levels)
3094 new_levels[0] = new_levels[0][i:j]
3096 new_codes = [level_codes[left:right] for level_codes in self.codes]
3097 new_codes[0] = new_codes[0] - i
3099 return MultiIndex(levels=new_levels, codes=new_codes, verify_integrity=False)
3101 def equals(self, other) -> bool:
3102 """
3103 Determines if two MultiIndex objects have the same labeling information
3104 (the levels themselves do not necessarily have to be the same)
3106 See Also
3107 --------
3108 equal_levels
3109 """
3110 if self.is_(other):
3111 return True
3113 if not isinstance(other, Index):
3114 return False
3116 if not isinstance(other, MultiIndex):
3117 # d-level MultiIndex can equal d-tuple Index
3118 if not is_object_dtype(other.dtype):
3119 if self.nlevels != other.nlevels:
3120 return False
3122 other_vals = com.values_from_object(ensure_index(other))
3123 return array_equivalent(self._ndarray_values, other_vals)
3125 if self.nlevels != other.nlevels:
3126 return False
3128 if len(self) != len(other):
3129 return False
3131 for i in range(self.nlevels):
3132 self_codes = self.codes[i]
3133 self_codes = self_codes[self_codes != -1]
3134 self_values = algos.take_nd(
3135 np.asarray(self.levels[i]._values), self_codes, allow_fill=False
3136 )
3138 other_codes = other.codes[i]
3139 other_codes = other_codes[other_codes != -1]
3140 other_values = algos.take_nd(
3141 np.asarray(other.levels[i]._values), other_codes, allow_fill=False
3142 )
3144 # since we use NaT both datetime64 and timedelta64
3145 # we can have a situation where a level is typed say
3146 # timedelta64 in self (IOW it has other values than NaT)
3147 # but types datetime64 in other (where its all NaT)
3148 # but these are equivalent
3149 if len(self_values) == 0 and len(other_values) == 0:
3150 continue
3152 if not array_equivalent(self_values, other_values):
3153 return False
3155 return True
3157 def equal_levels(self, other):
3158 """
3159 Return True if the levels of both MultiIndex objects are the same
3161 """
3162 if self.nlevels != other.nlevels:
3163 return False
3165 for i in range(self.nlevels):
3166 if not self.levels[i].equals(other.levels[i]):
3167 return False
3168 return True
3170 def union(self, other, sort=None):
3171 """
3172 Form the union of two MultiIndex objects
3174 Parameters
3175 ----------
3176 other : MultiIndex or array / Index of tuples
3177 sort : False or None, default None
3178 Whether to sort the resulting Index.
3180 * None : Sort the result, except when
3182 1. `self` and `other` are equal.
3183 2. `self` has length 0.
3184 3. Some values in `self` or `other` cannot be compared.
3185 A RuntimeWarning is issued in this case.
3187 * False : do not sort the result.
3189 .. versionadded:: 0.24.0
3191 .. versionchanged:: 0.24.1
3193 Changed the default value from ``True`` to ``None``
3194 (without change in behaviour).
3196 Returns
3197 -------
3198 Index
3200 >>> index.union(index2)
3201 """
3202 self._validate_sort_keyword(sort)
3203 self._assert_can_do_setop(other)
3204 other, result_names = self._convert_can_do_setop(other)
3206 if len(other) == 0 or self.equals(other):
3207 return self
3209 # TODO: Index.union returns other when `len(self)` is 0.
3211 uniq_tuples = lib.fast_unique_multiple(
3212 [self._ndarray_values, other._ndarray_values], sort=sort
3213 )
3215 return MultiIndex.from_arrays(
3216 zip(*uniq_tuples), sortorder=0, names=result_names
3217 )
3219 def intersection(self, other, sort=False):
3220 """
3221 Form the intersection of two MultiIndex objects.
3223 Parameters
3224 ----------
3225 other : MultiIndex or array / Index of tuples
3226 sort : False or None, default False
3227 Sort the resulting MultiIndex if possible
3229 .. versionadded:: 0.24.0
3231 .. versionchanged:: 0.24.1
3233 Changed the default from ``True`` to ``False``, to match
3234 behaviour from before 0.24.0
3236 Returns
3237 -------
3238 Index
3239 """
3240 self._validate_sort_keyword(sort)
3241 self._assert_can_do_setop(other)
3242 other, result_names = self._convert_can_do_setop(other)
3244 if self.equals(other):
3245 return self
3247 self_tuples = self._ndarray_values
3248 other_tuples = other._ndarray_values
3249 uniq_tuples = set(self_tuples) & set(other_tuples)
3251 if sort is None:
3252 uniq_tuples = sorted(uniq_tuples)
3254 if len(uniq_tuples) == 0:
3255 return MultiIndex(
3256 levels=self.levels,
3257 codes=[[]] * self.nlevels,
3258 names=result_names,
3259 verify_integrity=False,
3260 )
3261 else:
3262 return MultiIndex.from_arrays(
3263 zip(*uniq_tuples), sortorder=0, names=result_names
3264 )
3266 def difference(self, other, sort=None):
3267 """
3268 Compute set difference of two MultiIndex objects
3270 Parameters
3271 ----------
3272 other : MultiIndex
3273 sort : False or None, default None
3274 Sort the resulting MultiIndex if possible
3276 .. versionadded:: 0.24.0
3278 .. versionchanged:: 0.24.1
3280 Changed the default value from ``True`` to ``None``
3281 (without change in behaviour).
3283 Returns
3284 -------
3285 diff : MultiIndex
3286 """
3287 self._validate_sort_keyword(sort)
3288 self._assert_can_do_setop(other)
3289 other, result_names = self._convert_can_do_setop(other)
3291 if len(other) == 0:
3292 return self
3294 if self.equals(other):
3295 return MultiIndex(
3296 levels=self.levels,
3297 codes=[[]] * self.nlevels,
3298 names=result_names,
3299 verify_integrity=False,
3300 )
3302 this = self._get_unique_index()
3304 indexer = this.get_indexer(other)
3305 indexer = indexer.take((indexer != -1).nonzero()[0])
3307 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
3308 difference = this.values.take(label_diff)
3309 if sort is None:
3310 difference = sorted(difference)
3312 if len(difference) == 0:
3313 return MultiIndex(
3314 levels=[[]] * self.nlevels,
3315 codes=[[]] * self.nlevels,
3316 names=result_names,
3317 verify_integrity=False,
3318 )
3319 else:
3320 return MultiIndex.from_tuples(difference, sortorder=0, names=result_names)
3322 @Appender(_index_shared_docs["astype"])
3323 def astype(self, dtype, copy=True):
3324 dtype = pandas_dtype(dtype)
3325 if is_categorical_dtype(dtype):
3326 msg = "> 1 ndim Categorical are not supported at this time"
3327 raise NotImplementedError(msg)
3328 elif not is_object_dtype(dtype):
3329 raise TypeError(
3330 f"Setting {type(self)} dtype to anything other "
3331 "than object is not supported"
3332 )
3333 elif copy is True:
3334 return self._shallow_copy()
3335 return self
3337 def _convert_can_do_setop(self, other):
3338 result_names = self.names
3340 if not hasattr(other, "names"):
3341 if len(other) == 0:
3342 other = MultiIndex(
3343 levels=[[]] * self.nlevels,
3344 codes=[[]] * self.nlevels,
3345 verify_integrity=False,
3346 )
3347 else:
3348 msg = "other must be a MultiIndex or a list of tuples"
3349 try:
3350 other = MultiIndex.from_tuples(other)
3351 except TypeError:
3352 raise TypeError(msg)
3353 else:
3354 result_names = self.names if self.names == other.names else None
3355 return other, result_names
3357 def insert(self, loc, item):
3358 """
3359 Make new MultiIndex inserting new item at location
3361 Parameters
3362 ----------
3363 loc : int
3364 item : tuple
3365 Must be same length as number of levels in the MultiIndex
3367 Returns
3368 -------
3369 new_index : Index
3370 """
3371 # Pad the key with empty strings if lower levels of the key
3372 # aren't specified:
3373 if not isinstance(item, tuple):
3374 item = (item,) + ("",) * (self.nlevels - 1)
3375 elif len(item) != self.nlevels:
3376 raise ValueError("Item must have length equal to number of levels.")
3378 new_levels = []
3379 new_codes = []
3380 for k, level, level_codes in zip(item, self.levels, self.codes):
3381 if k not in level:
3382 # have to insert into level
3383 # must insert at end otherwise you have to recompute all the
3384 # other codes
3385 lev_loc = len(level)
3386 level = level.insert(lev_loc, k)
3387 else:
3388 lev_loc = level.get_loc(k)
3390 new_levels.append(level)
3391 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))
3393 return MultiIndex(
3394 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3395 )
3397 def delete(self, loc):
3398 """
3399 Make new index with passed location deleted
3401 Returns
3402 -------
3403 new_index : MultiIndex
3404 """
3405 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
3406 return MultiIndex(
3407 levels=self.levels,
3408 codes=new_codes,
3409 names=self.names,
3410 verify_integrity=False,
3411 )
3413 def _wrap_joined_index(self, joined, other):
3414 names = self.names if self.names == other.names else None
3415 return MultiIndex.from_tuples(joined, names=names)
3417 @Appender(Index.isin.__doc__)
3418 def isin(self, values, level=None):
3419 if level is None:
3420 values = MultiIndex.from_tuples(values, names=self.names).values
3421 return algos.isin(self.values, values)
3422 else:
3423 num = self._get_level_number(level)
3424 levs = self.get_level_values(num)
3426 if levs.size == 0:
3427 return np.zeros(len(levs), dtype=np.bool_)
3428 return levs.isin(values)
3431MultiIndex._add_numeric_methods_disabled()
3432MultiIndex._add_numeric_methods_add_sub_disabled()
3433MultiIndex._add_logical_methods_disabled()
3436def _sparsify(label_list, start: int = 0, sentinel=""):
3437 pivoted = list(zip(*label_list))
3438 k = len(label_list)
3440 result = pivoted[: start + 1]
3441 prev = pivoted[start]
3443 for cur in pivoted[start + 1 :]:
3444 sparse_cur = []
3446 for i, (p, t) in enumerate(zip(prev, cur)):
3447 if i == k - 1:
3448 sparse_cur.append(t)
3449 result.append(sparse_cur)
3450 break
3452 if p == t:
3453 sparse_cur.append(sentinel)
3454 else:
3455 sparse_cur.extend(cur[i:])
3456 result.append(sparse_cur)
3457 break
3459 prev = cur
3461 return list(zip(*result))
3464def _get_na_rep(dtype) -> str:
3465 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")
3468def maybe_droplevels(index, key):
3469 """
3470 Attempt to drop level or levels from the given index.
3472 Parameters
3473 ----------
3474 index: Index
3475 key : scalar or tuple
3477 Returns
3478 -------
3479 Index
3480 """
3481 # drop levels
3482 original_index = index
3483 if isinstance(key, tuple):
3484 for _ in key:
3485 try:
3486 index = index.droplevel(0)
3487 except ValueError:
3488 # we have dropped too much, so back out
3489 return original_index
3490 else:
3491 try:
3492 index = index.droplevel(0)
3493 except ValueError:
3494 pass
3496 return index
3499def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
3500 """
3501 Coerce the array_like indexer to the smallest integer dtype that can encode all
3502 of the given categories.
3504 Parameters
3505 ----------
3506 array_like : array-like
3507 categories : array-like
3508 copy : bool
3510 Returns
3511 -------
3512 np.ndarray
3513 Non-writeable.
3514 """
3515 array_like = coerce_indexer_dtype(array_like, categories)
3516 if copy:
3517 array_like = array_like.copy()
3518 array_like.flags.writeable = False
3519 return array_like