Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from datetime import datetime 

2import operator 

3from textwrap import dedent 

4from typing import Dict, FrozenSet, Hashable, Optional, Union 

5import warnings 

6 

7import numpy as np 

8 

9from pandas._libs import algos as libalgos, index as libindex, lib 

10import pandas._libs.join as libjoin 

11from pandas._libs.lib import is_datetime_array 

12from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp 

13from pandas._libs.tslibs.period import IncompatibleFrequency 

14from pandas._libs.tslibs.timezones import tz_compare 

15from pandas.compat import set_function_name 

16from pandas.compat.numpy import function as nv 

17from pandas.util._decorators import Appender, Substitution, cache_readonly 

18 

19from pandas.core.dtypes import concat as _concat 

20from pandas.core.dtypes.cast import maybe_cast_to_integer_array 

21from pandas.core.dtypes.common import ( 

22 ensure_categorical, 

23 ensure_int64, 

24 ensure_object, 

25 ensure_platform_int, 

26 is_bool, 

27 is_bool_dtype, 

28 is_categorical, 

29 is_categorical_dtype, 

30 is_datetime64_any_dtype, 

31 is_datetime64tz_dtype, 

32 is_dtype_equal, 

33 is_extension_array_dtype, 

34 is_float, 

35 is_float_dtype, 

36 is_hashable, 

37 is_integer, 

38 is_integer_dtype, 

39 is_interval_dtype, 

40 is_iterator, 

41 is_list_like, 

42 is_object_dtype, 

43 is_period_dtype, 

44 is_scalar, 

45 is_signed_integer_dtype, 

46 is_timedelta64_dtype, 

47 is_unsigned_integer_dtype, 

48) 

49from pandas.core.dtypes.concat import concat_compat 

50from pandas.core.dtypes.generic import ( 

51 ABCCategorical, 

52 ABCDataFrame, 

53 ABCDatetimeArray, 

54 ABCDatetimeIndex, 

55 ABCIndexClass, 

56 ABCIntervalIndex, 

57 ABCMultiIndex, 

58 ABCPandasArray, 

59 ABCPeriodIndex, 

60 ABCSeries, 

61 ABCTimedeltaIndex, 

62) 

63from pandas.core.dtypes.missing import array_equivalent, isna 

64 

65from pandas.core import ops 

66from pandas.core.accessor import CachedAccessor 

67import pandas.core.algorithms as algos 

68from pandas.core.arrays import ExtensionArray 

69from pandas.core.base import IndexOpsMixin, PandasObject 

70import pandas.core.common as com 

71from pandas.core.construction import extract_array 

72from pandas.core.indexers import deprecate_ndim_indexing, maybe_convert_indices 

73from pandas.core.indexes.frozen import FrozenList 

74import pandas.core.missing as missing 

75from pandas.core.ops import get_op_result_name 

76from pandas.core.ops.invalid import make_invalid_op 

77from pandas.core.strings import StringMethods 

78 

79from pandas.io.formats.printing import ( 

80 default_pprint, 

81 format_object_attrs, 

82 format_object_summary, 

83 pprint_thing, 

84) 

85 

86__all__ = ["Index"] 

87 

88_unsortable_types = frozenset(("mixed", "mixed-integer")) 

89 

90_index_doc_kwargs = dict( 

91 klass="Index", 

92 inplace="", 

93 target_klass="Index", 

94 raises_section="", 

95 unique="Index", 

96 duplicated="np.ndarray", 

97) 

98_index_shared_docs = dict() 

99 

100 

101def _make_comparison_op(op, cls): 

102 def cmp_method(self, other): 

103 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): 

104 if other.ndim > 0 and len(self) != len(other): 

105 raise ValueError("Lengths must match to compare") 

106 

107 if is_object_dtype(self) and isinstance(other, ABCCategorical): 

108 left = type(other)(self._values, dtype=other.dtype) 

109 return op(left, other) 

110 elif is_object_dtype(self) and isinstance(other, ExtensionArray): 

111 # e.g. PeriodArray 

112 with np.errstate(all="ignore"): 

113 result = op(self.values, other) 

114 

115 elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): 

116 # don't pass MultiIndex 

117 with np.errstate(all="ignore"): 

118 result = ops.comp_method_OBJECT_ARRAY(op, self.values, other) 

119 

120 else: 

121 with np.errstate(all="ignore"): 

122 result = op(self.values, np.asarray(other)) 

123 

124 if is_bool_dtype(result): 

125 return result 

126 return ops.invalid_comparison(self, other, op) 

127 

128 name = f"__{op.__name__}__" 

129 return set_function_name(cmp_method, name, cls) 

130 

131 

132def _make_arithmetic_op(op, cls): 

133 def index_arithmetic_method(self, other): 

134 if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)): 

135 return NotImplemented 

136 

137 from pandas import Series 

138 

139 result = op(Series(self), other) 

140 if isinstance(result, tuple): 

141 return (Index(result[0]), Index(result[1])) 

142 return Index(result) 

143 

144 name = f"__{op.__name__}__" 

145 # TODO: docstring? 

146 return set_function_name(index_arithmetic_method, name, cls) 

147 

148 

149class InvalidIndexError(Exception): 

150 pass 

151 

152 

153_o_dtype = np.dtype(object) 

154_Identity = object 

155 

156 

157def _new_Index(cls, d): 

158 """ 

159 This is called upon unpickling, rather than the default which doesn't 

160 have arguments and breaks __new__. 

161 """ 

162 # required for backward compat, because PI can't be instantiated with 

163 # ordinals through __new__ GH #13277 

164 if issubclass(cls, ABCPeriodIndex): 

165 from pandas.core.indexes.period import _new_PeriodIndex 

166 

167 return _new_PeriodIndex(cls, **d) 

168 

169 if issubclass(cls, ABCMultiIndex): 

170 if "labels" in d and "codes" not in d: 

171 # GH#23752 "labels" kwarg has been replaced with "codes" 

172 d["codes"] = d.pop("labels") 

173 

174 return cls.__new__(cls, **d) 

175 

176 

177class Index(IndexOpsMixin, PandasObject): 

178 """ 

179 Immutable ndarray implementing an ordered, sliceable set. The basic object 

180 storing axis labels for all pandas objects. 

181 

182 Parameters 

183 ---------- 

184 data : array-like (1-dimensional) 

185 dtype : NumPy dtype (default: object) 

186 If dtype is None, we find the dtype that best fits the data. 

187 If an actual dtype is provided, we coerce to that dtype if it's safe. 

188 Otherwise, an error will be raised. 

189 copy : bool 

190 Make a copy of input ndarray. 

191 name : object 

192 Name to be stored in the index. 

193 tupleize_cols : bool (default: True) 

194 When True, attempt to create a MultiIndex if possible. 

195 

196 See Also 

197 -------- 

198 RangeIndex : Index implementing a monotonic integer range. 

199 CategoricalIndex : Index of :class:`Categorical` s. 

200 MultiIndex : A multi-level, or hierarchical, Index. 

201 IntervalIndex : An Index of :class:`Interval` s. 

202 DatetimeIndex, TimedeltaIndex, PeriodIndex 

203 Int64Index, UInt64Index, Float64Index 

204 

205 Notes 

206 ----- 

207 An Index instance can **only** contain hashable objects 

208 

209 Examples 

210 -------- 

211 >>> pd.Index([1, 2, 3]) 

212 Int64Index([1, 2, 3], dtype='int64') 

213 

214 >>> pd.Index(list('abc')) 

215 Index(['a', 'b', 'c'], dtype='object') 

216 """ 

217 

218 # tolist is not actually deprecated, just suppressed in the __dir__ 

219 _deprecations: FrozenSet[str] = ( 

220 PandasObject._deprecations 

221 | IndexOpsMixin._deprecations 

222 | frozenset(["contains", "set_value"]) 

223 ) 

224 

225 # To hand over control to subclasses 

226 _join_precedence = 1 

227 

228 # Cython methods; see github.com/cython/cython/issues/2647 

229 # for why we need to wrap these instead of making them class attributes 

230 # Moreover, cython will choose the appropriate-dtyped sub-function 

231 # given the dtypes of the passed arguments 

232 def _left_indexer_unique(self, left, right): 

233 return libjoin.left_join_indexer_unique(left, right) 

234 

235 def _left_indexer(self, left, right): 

236 return libjoin.left_join_indexer(left, right) 

237 

238 def _inner_indexer(self, left, right): 

239 return libjoin.inner_join_indexer(left, right) 

240 

241 def _outer_indexer(self, left, right): 

242 return libjoin.outer_join_indexer(left, right) 

243 

244 _typ = "index" 

245 _data: Union[ExtensionArray, np.ndarray] 

246 _id = None 

247 _name: Optional[Hashable] = None 

248 # MultiIndex.levels previously allowed setting the index name. We 

249 # don't allow this anymore, and raise if it happens rather than 

250 # failing silently. 

251 _no_setting_name: bool = False 

252 _comparables = ["name"] 

253 _attributes = ["name"] 

254 _is_numeric_dtype = False 

255 _can_hold_na = True 

256 

257 # would we like our indexing holder to defer to us 

258 _defer_to_indexing = False 

259 

260 # prioritize current class for _shallow_copy_with_infer, 

261 # used to infer integers as datetime-likes 

262 _infer_as_myclass = False 

263 

264 _engine_type = libindex.ObjectEngine 

265 # whether we support partial string indexing. Overridden 

266 # in DatetimeIndex and PeriodIndex 

267 _supports_partial_string_indexing = False 

268 

269 _accessors = {"str"} 

270 

271 str = CachedAccessor("str", StringMethods) 

272 

273 # -------------------------------------------------------------------- 

274 # Constructors 

275 

276 def __new__( 

277 cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs, 

278 ) -> "Index": 

279 

280 from pandas.core.indexes.range import RangeIndex 

281 

282 name = maybe_extract_name(name, data, cls) 

283 

284 if isinstance(data, ABCPandasArray): 

285 # ensure users don't accidentally put a PandasArray in an index. 

286 data = data.to_numpy() 

287 

288 # range 

289 if isinstance(data, RangeIndex): 

290 return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) 

291 elif isinstance(data, range): 

292 return RangeIndex.from_range(data, dtype=dtype, name=name) 

293 

294 # categorical 

295 elif is_categorical_dtype(data) or is_categorical_dtype(dtype): 

296 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

297 from pandas.core.indexes.category import CategoricalIndex 

298 

299 return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) 

300 

301 # interval 

302 elif is_interval_dtype(data) or is_interval_dtype(dtype): 

303 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

304 from pandas.core.indexes.interval import IntervalIndex 

305 

306 closed = kwargs.pop("closed", None) 

307 if is_dtype_equal(_o_dtype, dtype): 

308 return IntervalIndex( 

309 data, name=name, copy=copy, closed=closed, **kwargs 

310 ).astype(object) 

311 return IntervalIndex( 

312 data, dtype=dtype, name=name, copy=copy, closed=closed, **kwargs 

313 ) 

314 

315 elif ( 

316 is_datetime64_any_dtype(data) 

317 or is_datetime64_any_dtype(dtype) 

318 or "tz" in kwargs 

319 ): 

320 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

321 from pandas import DatetimeIndex 

322 

323 if is_dtype_equal(_o_dtype, dtype): 

324 # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, 

325 # will raise in the where `data` is already tz-aware. So 

326 # we leave it out of this step and cast to object-dtype after 

327 # the DatetimeIndex construction. 

328 # Note we can pass copy=False because the .astype below 

329 # will always make a copy 

330 return DatetimeIndex(data, copy=False, name=name, **kwargs).astype( 

331 object 

332 ) 

333 else: 

334 return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) 

335 

336 elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype): 

337 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

338 from pandas import TimedeltaIndex 

339 

340 if is_dtype_equal(_o_dtype, dtype): 

341 # Note we can pass copy=False because the .astype below 

342 # will always make a copy 

343 return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype( 

344 object 

345 ) 

346 else: 

347 return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) 

348 

349 elif is_period_dtype(data) or is_period_dtype(dtype): 

350 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

351 from pandas import PeriodIndex 

352 

353 if is_dtype_equal(_o_dtype, dtype): 

354 return PeriodIndex(data, copy=False, name=name, **kwargs).astype(object) 

355 return PeriodIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) 

356 

357 # extension dtype 

358 elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): 

359 if not (dtype is None or is_object_dtype(dtype)): 

360 # coerce to the provided dtype 

361 ea_cls = dtype.construct_array_type() 

362 data = ea_cls._from_sequence(data, dtype=dtype, copy=False) 

363 else: 

364 data = np.asarray(data, dtype=object) 

365 

366 # coerce to the object dtype 

367 data = data.astype(object) 

368 return Index(data, dtype=object, copy=copy, name=name, **kwargs) 

369 

370 # index-like 

371 elif isinstance(data, (np.ndarray, Index, ABCSeries)): 

372 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

373 from pandas.core.indexes.numeric import ( 

374 Float64Index, 

375 Int64Index, 

376 UInt64Index, 

377 ) 

378 

379 if dtype is not None: 

380 # we need to avoid having numpy coerce 

381 # things that look like ints/floats to ints unless 

382 # they are actually ints, e.g. '0' and 0.0 

383 # should not be coerced 

384 # GH 11836 

385 data = _maybe_cast_with_dtype(data, dtype, copy) 

386 dtype = data.dtype # TODO: maybe not for object? 

387 

388 # maybe coerce to a sub-class 

389 if is_signed_integer_dtype(data.dtype): 

390 return Int64Index(data, copy=copy, dtype=dtype, name=name) 

391 elif is_unsigned_integer_dtype(data.dtype): 

392 return UInt64Index(data, copy=copy, dtype=dtype, name=name) 

393 elif is_float_dtype(data.dtype): 

394 return Float64Index(data, copy=copy, dtype=dtype, name=name) 

395 elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): 

396 subarr = data.astype("object") 

397 else: 

398 subarr = com.asarray_tuplesafe(data, dtype=object) 

399 

400 # asarray_tuplesafe does not always copy underlying data, 

401 # so need to make sure that this happens 

402 if copy: 

403 subarr = subarr.copy() 

404 

405 if dtype is None: 

406 new_data, new_dtype = _maybe_cast_data_without_dtype(subarr) 

407 if new_dtype is not None: 

408 return cls( 

409 new_data, dtype=new_dtype, copy=False, name=name, **kwargs 

410 ) 

411 

412 if kwargs: 

413 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") 

414 if subarr.ndim > 1: 

415 # GH#13601, GH#20285, GH#27125 

416 raise ValueError("Index data must be 1-dimensional") 

417 return cls._simple_new(subarr, name, **kwargs) 

418 

419 elif hasattr(data, "__array__"): 

420 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) 

421 elif data is None or is_scalar(data): 

422 raise cls._scalar_data_error(data) 

423 else: 

424 if tupleize_cols and is_list_like(data): 

425 # GH21470: convert iterable to list before determining if empty 

426 if is_iterator(data): 

427 data = list(data) 

428 

429 if data and all(isinstance(e, tuple) for e in data): 

430 # we must be all tuples, otherwise don't construct 

431 # 10697 

432 from pandas.core.indexes.multi import MultiIndex 

433 

434 return MultiIndex.from_tuples( 

435 data, names=name or kwargs.get("names") 

436 ) 

437 # other iterable of some kind 

438 subarr = com.asarray_tuplesafe(data, dtype=object) 

439 return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) 

440 

441 """ 

442 NOTE for new Index creation: 

443 

444 - _simple_new: It returns new Index with the same type as the caller. 

445 All metadata (such as name) must be provided by caller's responsibility. 

446 Using _shallow_copy is recommended because it fills these metadata 

447 otherwise specified. 

448 

449 - _shallow_copy: It returns new Index with the same type (using 

450 _simple_new), but fills caller's metadata otherwise specified. Passed 

451 kwargs will overwrite corresponding metadata. 

452 

453 - _shallow_copy_with_infer: It returns new Index inferring its type 

454 from passed values. It fills caller's metadata otherwise specified as the 

455 same as _shallow_copy. 

456 

457 See each method's docstring. 

458 """ 

459 

460 @property 

461 def asi8(self): 

462 """ 

463 Integer representation of the values. 

464 

465 Returns 

466 ------- 

467 ndarray 

468 An ndarray with int64 dtype. 

469 """ 

470 return None 

471 

472 @classmethod 

473 def _simple_new(cls, values, name=None, dtype=None): 

474 """ 

475 We require that we have a dtype compat for the values. If we are passed 

476 a non-dtype compat, then coerce using the constructor. 

477 

478 Must be careful not to recurse. 

479 """ 

480 if isinstance(values, (ABCSeries, ABCIndexClass)): 

481 # Index._data must always be an ndarray. 

482 # This is no-copy for when _values is an ndarray, 

483 # which should be always at this point. 

484 values = np.asarray(values._values) 

485 

486 result = object.__new__(cls) 

487 result._data = values 

488 # _index_data is a (temporary?) fix to ensure that the direct data 

489 # manipulation we do in `_libs/reduction.pyx` continues to work. 

490 # We need access to the actual ndarray, since we're messing with 

491 # data buffers and strides. We don't re-use `_ndarray_values`, since 

492 # we actually set this value too. 

493 result._index_data = values 

494 result._name = name 

495 

496 return result._reset_identity() 

497 

498 @cache_readonly 

499 def _constructor(self): 

500 return type(self) 

501 

502 # -------------------------------------------------------------------- 

503 # Index Internals Methods 

504 

505 def _get_attributes_dict(self): 

506 """ 

507 Return an attributes dict for my class. 

508 """ 

509 return {k: getattr(self, k, None) for k in self._attributes} 

510 

511 _index_shared_docs[ 

512 "_shallow_copy" 

513 ] = """ 

514 Create a new Index with the same class as the caller, don't copy the 

515 data, use the same object attributes with passed in attributes taking 

516 precedence. 

517 

518 *this is an internal non-public method* 

519 

520 Parameters 

521 ---------- 

522 values : the values to create the new Index, optional 

523 kwargs : updates the default attributes for this Index 

524 """ 

525 

526 @Appender(_index_shared_docs["_shallow_copy"]) 

527 def _shallow_copy(self, values=None, **kwargs): 

528 if values is None: 

529 values = self.values 

530 attributes = self._get_attributes_dict() 

531 attributes.update(kwargs) 

532 if not len(values) and "dtype" not in kwargs: 

533 attributes["dtype"] = self.dtype 

534 

535 # _simple_new expects an the type of self._data 

536 values = getattr(values, "_values", values) 

537 if isinstance(values, ABCDatetimeArray): 

538 # `self.values` returns `self` for tz-aware, so we need to unwrap 

539 # more specifically 

540 values = values.asi8 

541 

542 return self._simple_new(values, **attributes) 

543 

544 def _shallow_copy_with_infer(self, values, **kwargs): 

545 """ 

546 Create a new Index inferring the class with passed value, don't copy 

547 the data, use the same object attributes with passed in attributes 

548 taking precedence. 

549 

550 *this is an internal non-public method* 

551 

552 Parameters 

553 ---------- 

554 values : the values to create the new Index, optional 

555 kwargs : updates the default attributes for this Index 

556 """ 

557 attributes = self._get_attributes_dict() 

558 attributes.update(kwargs) 

559 attributes["copy"] = False 

560 if not len(values) and "dtype" not in kwargs: 

561 attributes["dtype"] = self.dtype 

562 if self._infer_as_myclass: 

563 try: 

564 return self._constructor(values, **attributes) 

565 except (TypeError, ValueError): 

566 pass 

567 return Index(values, **attributes) 

568 

569 def _update_inplace(self, result, **kwargs): 

570 # guard when called from IndexOpsMixin 

571 raise TypeError("Index can't be updated inplace") 

572 

573 def is_(self, other) -> bool: 

574 """ 

575 More flexible, faster check like ``is`` but that works through views. 

576 

577 Note: this is *not* the same as ``Index.identical()``, which checks 

578 that metadata is also the same. 

579 

580 Parameters 

581 ---------- 

582 other : object 

583 other object to compare against. 

584 

585 Returns 

586 ------- 

587 True if both have same underlying data, False otherwise : bool 

588 """ 

589 # use something other than None to be clearer 

590 return self._id is getattr(other, "_id", Ellipsis) and self._id is not None 

591 

592 def _reset_identity(self): 

593 """ 

594 Initializes or resets ``_id`` attribute with new object. 

595 """ 

596 self._id = _Identity() 

597 return self 

598 

599 def _cleanup(self): 

600 self._engine.clear_mapping() 

601 

602 @cache_readonly 

603 def _engine(self): 

604 # property, for now, slow to look up 

605 

606 # to avoid a reference cycle, bind `_ndarray_values` to a local variable, so 

607 # `self` is not passed into the lambda. 

608 _ndarray_values = self._ndarray_values 

609 return self._engine_type(lambda: _ndarray_values, len(self)) 

610 

611 # -------------------------------------------------------------------- 

612 # Array-Like Methods 

613 

614 # ndarray compat 

615 def __len__(self) -> int: 

616 """ 

617 Return the length of the Index. 

618 """ 

619 return len(self._data) 

620 

621 def __array__(self, dtype=None) -> np.ndarray: 

622 """ 

623 The array interface, return my values. 

624 """ 

625 return np.asarray(self._data, dtype=dtype) 

626 

627 def __array_wrap__(self, result, context=None): 

628 """ 

629 Gets called after a ufunc. 

630 """ 

631 result = lib.item_from_zerodim(result) 

632 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: 

633 return result 

634 

635 attrs = self._get_attributes_dict() 

636 return Index(result, **attrs) 

637 

638 @cache_readonly 

639 def dtype(self): 

640 """ 

641 Return the dtype object of the underlying data. 

642 """ 

643 return self._data.dtype 

644 

645 def ravel(self, order="C"): 

646 """ 

647 Return an ndarray of the flattened values of the underlying data. 

648 

649 Returns 

650 ------- 

651 numpy.ndarray 

652 Flattened array. 

653 

654 See Also 

655 -------- 

656 numpy.ndarray.ravel 

657 """ 

658 return self._ndarray_values.ravel(order=order) 

659 

660 def view(self, cls=None): 

661 

662 # we need to see if we are subclassing an 

663 # index type here 

664 if cls is not None and not hasattr(cls, "_typ"): 

665 result = self._data.view(cls) 

666 else: 

667 result = self._shallow_copy() 

668 if isinstance(result, Index): 

669 result._id = self._id 

670 return result 

671 

672 _index_shared_docs[ 

673 "astype" 

674 ] = """ 

675 Create an Index with values cast to dtypes. The class of a new Index 

676 is determined by dtype. When conversion is impossible, a ValueError 

677 exception is raised. 

678 

679 Parameters 

680 ---------- 

681 dtype : numpy dtype or pandas type 

682 Note that any signed integer `dtype` is treated as ``'int64'``, 

683 and any unsigned integer `dtype` is treated as ``'uint64'``, 

684 regardless of the size. 

685 copy : bool, default True 

686 By default, astype always returns a newly allocated object. 

687 If copy is set to False and internal requirements on dtype are 

688 satisfied, the original data is used to create a new Index 

689 or the original Index is returned. 

690 

691 Returns 

692 ------- 

693 Index 

694 Index with values cast to specified dtype. 

695 """ 

696 

697 @Appender(_index_shared_docs["astype"]) 

698 def astype(self, dtype, copy=True): 

699 if is_dtype_equal(self.dtype, dtype): 

700 return self.copy() if copy else self 

701 

702 elif is_categorical_dtype(dtype): 

703 from pandas.core.indexes.category import CategoricalIndex 

704 

705 return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) 

706 

707 elif is_extension_array_dtype(dtype): 

708 return Index(np.asarray(self), dtype=dtype, copy=copy) 

709 

710 try: 

711 casted = self.values.astype(dtype, copy=copy) 

712 except (TypeError, ValueError): 

713 raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") 

714 return Index(casted, name=self.name, dtype=dtype) 

715 

716 _index_shared_docs[ 

717 "take" 

718 ] = """ 

719 Return a new %(klass)s of the values selected by the indices. 

720 

721 For internal compatibility with numpy arrays. 

722 

723 Parameters 

724 ---------- 

725 indices : list 

726 Indices to be taken. 

727 axis : int, optional 

728 The axis over which to select values, always 0. 

729 allow_fill : bool, default True 

730 fill_value : bool, default None 

731 If allow_fill=True and fill_value is not None, indices specified by 

732 -1 is regarded as NA. If Index doesn't hold NA, raise ValueError. 

733 

734 Returns 

735 ------- 

736 numpy.ndarray 

737 Elements of given indices. 

738 

739 See Also 

740 -------- 

741 numpy.ndarray.take 

742 """ 

743 

744 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

745 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): 

746 if kwargs: 

747 nv.validate_take(tuple(), kwargs) 

748 indices = ensure_platform_int(indices) 

749 if self._can_hold_na: 

750 taken = self._assert_take_fillable( 

751 self.values, 

752 indices, 

753 allow_fill=allow_fill, 

754 fill_value=fill_value, 

755 na_value=self._na_value, 

756 ) 

757 else: 

758 if allow_fill and fill_value is not None: 

759 cls_name = type(self).__name__ 

760 raise ValueError( 

761 f"Unable to fill values because {cls_name} cannot contain NA" 

762 ) 

763 taken = self.values.take(indices) 

764 return self._shallow_copy(taken) 

765 

766 def _assert_take_fillable( 

767 self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan 

768 ): 

769 """ 

770 Internal method to handle NA filling of take. 

771 """ 

772 indices = ensure_platform_int(indices) 

773 

774 # only fill if we are passing a non-None fill_value 

775 if allow_fill and fill_value is not None: 

776 if (indices < -1).any(): 

777 raise ValueError( 

778 "When allow_fill=True and fill_value is not None, " 

779 "all indices must be >= -1" 

780 ) 

781 taken = algos.take( 

782 values, indices, allow_fill=allow_fill, fill_value=na_value 

783 ) 

784 else: 

785 taken = values.take(indices) 

786 return taken 

787 

788 _index_shared_docs[ 

789 "repeat" 

790 ] = """ 

791 Repeat elements of a %(klass)s. 

792 

793 Returns a new %(klass)s where each element of the current %(klass)s 

794 is repeated consecutively a given number of times. 

795 

796 Parameters 

797 ---------- 

798 repeats : int or array of ints 

799 The number of repetitions for each element. This should be a 

800 non-negative integer. Repeating 0 times will return an empty 

801 %(klass)s. 

802 axis : None 

803 Must be ``None``. Has no effect but is accepted for compatibility 

804 with numpy. 

805 

806 Returns 

807 ------- 

808 repeated_index : %(klass)s 

809 Newly created %(klass)s with repeated elements. 

810 

811 See Also 

812 -------- 

813 Series.repeat : Equivalent function for Series. 

814 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

815 

816 Examples 

817 -------- 

818 >>> idx = pd.Index(['a', 'b', 'c']) 

819 >>> idx 

820 Index(['a', 'b', 'c'], dtype='object') 

821 >>> idx.repeat(2) 

822 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') 

823 >>> idx.repeat([1, 2, 3]) 

824 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') 

825 """ 

826 

827 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

828 def repeat(self, repeats, axis=None): 

829 repeats = ensure_platform_int(repeats) 

830 nv.validate_repeat(tuple(), dict(axis=axis)) 

831 return self._shallow_copy(self._values.repeat(repeats)) 

832 

833 # -------------------------------------------------------------------- 

834 # Copying Methods 

835 

836 _index_shared_docs[ 

837 "copy" 

838 ] = """ 

839 Make a copy of this object. Name and dtype sets those attributes on 

840 the new object. 

841 

842 Parameters 

843 ---------- 

844 name : str, optional 

845 deep : bool, default False 

846 dtype : numpy dtype or pandas type 

847 

848 Returns 

849 ------- 

850 copy : Index 

851 

852 Notes 

853 ----- 

854 In most cases, there should be no functional difference from using 

855 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

856 """ 

857 

858 @Appender(_index_shared_docs["copy"]) 

859 def copy(self, name=None, deep=False, dtype=None, **kwargs): 

860 if deep: 

861 new_index = self._shallow_copy(self._data.copy()) 

862 else: 

863 new_index = self._shallow_copy() 

864 

865 names = kwargs.get("names") 

866 names = self._validate_names(name=name, names=names, deep=deep) 

867 new_index = new_index.set_names(names) 

868 

869 if dtype: 

870 new_index = new_index.astype(dtype) 

871 return new_index 

872 

873 def __copy__(self, **kwargs): 

874 return self.copy(**kwargs) 

875 

876 def __deepcopy__(self, memo=None): 

877 """ 

878 Parameters 

879 ---------- 

880 memo, default None 

881 Standard signature. Unused 

882 """ 

883 return self.copy(deep=True) 

884 

885 # -------------------------------------------------------------------- 

886 # Rendering Methods 

887 

888 def __repr__(self): 

889 """ 

890 Return a string representation for this object. 

891 """ 

892 klass_name = type(self).__name__ 

893 data = self._format_data() 

894 attrs = self._format_attrs() 

895 space = self._format_space() 

896 attrs_str = [f"{k}={v}" for k, v in attrs] 

897 prepr = f",{space}".join(attrs_str) 

898 

899 # no data provided, just attributes 

900 if data is None: 

901 data = "" 

902 

903 res = f"{klass_name}({data}{prepr})" 

904 

905 return res 

906 

907 def _format_space(self): 

908 

909 # using space here controls if the attributes 

910 # are line separated or not (the default) 

911 

912 # max_seq_items = get_option('display.max_seq_items') 

913 # if len(self) > max_seq_items: 

914 # space = "\n%s" % (' ' * (len(klass) + 1)) 

915 return " " 

916 

917 @property 

918 def _formatter_func(self): 

919 """ 

920 Return the formatter function. 

921 """ 

922 return default_pprint 

923 

924 def _format_data(self, name=None): 

925 """ 

926 Return the formatted data as a unicode string. 

927 """ 

928 

929 # do we want to justify (only do so for non-objects) 

930 is_justify = not ( 

931 self.inferred_type in ("string", "unicode") 

932 or ( 

933 self.inferred_type == "categorical" and is_object_dtype(self.categories) 

934 ) 

935 ) 

936 

937 return format_object_summary( 

938 self, self._formatter_func, is_justify=is_justify, name=name 

939 ) 

940 

941 def _format_attrs(self): 

942 """ 

943 Return a list of tuples of the (attr,formatted_value). 

944 """ 

945 return format_object_attrs(self) 

946 

947 def _mpl_repr(self): 

948 # how to represent ourselves to matplotlib 

949 return self.values 

950 

951 def format(self, name=False, formatter=None, **kwargs): 

952 """ 

953 Render a string representation of the Index. 

954 """ 

955 header = [] 

956 if name: 

957 header.append( 

958 pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) 

959 if self.name is not None 

960 else "" 

961 ) 

962 

963 if formatter is not None: 

964 return header + list(self.map(formatter)) 

965 

966 return self._format_with_header(header, **kwargs) 

967 

968 def _format_with_header(self, header, na_rep="NaN", **kwargs): 

969 values = self.values 

970 

971 from pandas.io.formats.format import format_array 

972 

973 if is_categorical_dtype(values.dtype): 

974 values = np.array(values) 

975 

976 elif is_object_dtype(values.dtype): 

977 values = lib.maybe_convert_objects(values, safe=1) 

978 

979 if is_object_dtype(values.dtype): 

980 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] 

981 

982 # could have nans 

983 mask = isna(values) 

984 if mask.any(): 

985 result = np.array(result) 

986 result[mask] = na_rep 

987 result = result.tolist() 

988 

989 else: 

990 result = _trim_front(format_array(values, None, justify="left")) 

991 return header + result 

992 

993 def to_native_types(self, slicer=None, **kwargs): 

994 """ 

995 Format specified values of `self` and return them. 

996 

997 Parameters 

998 ---------- 

999 slicer : int, array-like 

1000 An indexer into `self` that specifies which values 

1001 are used in the formatting process. 

1002 kwargs : dict 

1003 Options for specifying how the values should be formatted. 

1004 These options include the following: 

1005 

1006 1) na_rep : str 

1007 The value that serves as a placeholder for NULL values 

1008 2) quoting : bool or None 

1009 Whether or not there are quoted values in `self` 

1010 3) date_format : str 

1011 The format used to represent date-like values. 

1012 

1013 Returns 

1014 ------- 

1015 numpy.ndarray 

1016 Formatted values. 

1017 """ 

1018 

1019 values = self 

1020 if slicer is not None: 

1021 values = values[slicer] 

1022 return values._format_native_types(**kwargs) 

1023 

1024 def _format_native_types(self, na_rep="", quoting=None, **kwargs): 

1025 """ 

1026 Actually format specific types of the index. 

1027 """ 

1028 mask = isna(self) 

1029 if not self.is_object() and not quoting: 

1030 values = np.asarray(self).astype(str) 

1031 else: 

1032 values = np.array(self, dtype=object, copy=True) 

1033 

1034 values[mask] = na_rep 

1035 return values 

1036 

1037 def _summary(self, name=None): 

1038 """ 

1039 Return a summarized representation. 

1040 

1041 Parameters 

1042 ---------- 

1043 name : str 

1044 name to use in the summary representation 

1045 

1046 Returns 

1047 ------- 

1048 String with a summarized representation of the index 

1049 """ 

1050 if len(self) > 0: 

1051 head = self[0] 

1052 if hasattr(head, "format") and not isinstance(head, str): 

1053 head = head.format() 

1054 tail = self[-1] 

1055 if hasattr(tail, "format") and not isinstance(tail, str): 

1056 tail = tail.format() 

1057 index_summary = f", {head} to {tail}" 

1058 else: 

1059 index_summary = "" 

1060 

1061 if name is None: 

1062 name = type(self).__name__ 

1063 return f"{name}: {len(self)} entries{index_summary}" 

1064 

1065 # -------------------------------------------------------------------- 

1066 # Conversion Methods 

1067 

1068 def to_flat_index(self): 

1069 """ 

1070 Identity method. 

1071 

1072 .. versionadded:: 0.24.0 

1073 

1074 This is implemented for compatibility with subclass implementations 

1075 when chaining. 

1076 

1077 Returns 

1078 ------- 

1079 pd.Index 

1080 Caller. 

1081 

1082 See Also 

1083 -------- 

1084 MultiIndex.to_flat_index : Subclass implementation. 

1085 """ 

1086 return self 

1087 

1088 def to_series(self, index=None, name=None): 

1089 """ 

1090 Create a Series with both index and values equal to the index keys. 

1091 

1092 Useful with map for returning an indexer based on an index. 

1093 

1094 Parameters 

1095 ---------- 

1096 index : Index, optional 

1097 Index of resulting Series. If None, defaults to original index. 

1098 name : str, optional 

1099 Dame of resulting Series. If None, defaults to name of original 

1100 index. 

1101 

1102 Returns 

1103 ------- 

1104 Series 

1105 The dtype will be based on the type of the Index values. 

1106 """ 

1107 

1108 from pandas import Series 

1109 

1110 if index is None: 

1111 index = self._shallow_copy() 

1112 if name is None: 

1113 name = self.name 

1114 

1115 return Series(self.values.copy(), index=index, name=name) 

1116 

1117 def to_frame(self, index=True, name=None): 

1118 """ 

1119 Create a DataFrame with a column containing the Index. 

1120 

1121 .. versionadded:: 0.24.0 

1122 

1123 Parameters 

1124 ---------- 

1125 index : bool, default True 

1126 Set the index of the returned DataFrame as the original Index. 

1127 

1128 name : object, default None 

1129 The passed name should substitute for the index name (if it has 

1130 one). 

1131 

1132 Returns 

1133 ------- 

1134 DataFrame 

1135 DataFrame containing the original Index data. 

1136 

1137 See Also 

1138 -------- 

1139 Index.to_series : Convert an Index to a Series. 

1140 Series.to_frame : Convert Series to DataFrame. 

1141 

1142 Examples 

1143 -------- 

1144 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1145 >>> idx.to_frame() 

1146 animal 

1147 animal 

1148 Ant Ant 

1149 Bear Bear 

1150 Cow Cow 

1151 

1152 By default, the original Index is reused. To enforce a new Index: 

1153 

1154 >>> idx.to_frame(index=False) 

1155 animal 

1156 0 Ant 

1157 1 Bear 

1158 2 Cow 

1159 

1160 To override the name of the resulting column, specify `name`: 

1161 

1162 >>> idx.to_frame(index=False, name='zoo') 

1163 zoo 

1164 0 Ant 

1165 1 Bear 

1166 2 Cow 

1167 """ 

1168 

1169 from pandas import DataFrame 

1170 

1171 if name is None: 

1172 name = self.name or 0 

1173 result = DataFrame({name: self._values.copy()}) 

1174 

1175 if index: 

1176 result.index = self 

1177 return result 

1178 

1179 # -------------------------------------------------------------------- 

1180 # Name-Centric Methods 

1181 

1182 @property 

1183 def name(self): 

1184 return self._name 

1185 

1186 @name.setter 

1187 def name(self, value): 

1188 if self._no_setting_name: 

1189 # Used in MultiIndex.levels to avoid silently ignoring name updates. 

1190 raise RuntimeError( 

1191 "Cannot set name on a level of a MultiIndex. Use " 

1192 "'MultiIndex.set_names' instead." 

1193 ) 

1194 maybe_extract_name(value, None, type(self)) 

1195 self._name = value 

1196 

1197 def _validate_names(self, name=None, names=None, deep=False): 

1198 """ 

1199 Handles the quirks of having a singular 'name' parameter for general 

1200 Index and plural 'names' parameter for MultiIndex. 

1201 """ 

1202 from copy import deepcopy 

1203 

1204 if names is not None and name is not None: 

1205 raise TypeError("Can only provide one of `names` and `name`") 

1206 elif names is None and name is None: 

1207 return deepcopy(self.names) if deep else self.names 

1208 elif names is not None: 

1209 if not is_list_like(names): 

1210 raise TypeError("Must pass list-like as `names`.") 

1211 return names 

1212 else: 

1213 if not is_list_like(name): 

1214 return [name] 

1215 return name 

1216 

1217 def _get_names(self): 

1218 return FrozenList((self.name,)) 

1219 

1220 def _set_names(self, values, level=None): 

1221 """ 

1222 Set new names on index. Each name has to be a hashable type. 

1223 

1224 Parameters 

1225 ---------- 

1226 values : str or sequence 

1227 name(s) to set 

1228 level : int, level name, or sequence of int/level names (default None) 

1229 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1230 for all levels). Otherwise level must be None 

1231 

1232 Raises 

1233 ------ 

1234 TypeError if each name is not hashable. 

1235 """ 

1236 if not is_list_like(values): 

1237 raise ValueError("Names must be a list-like") 

1238 if len(values) != 1: 

1239 raise ValueError(f"Length of new names must be 1, got {len(values)}") 

1240 

1241 # GH 20527 

1242 # All items in 'name' need to be hashable: 

1243 for name in values: 

1244 if not is_hashable(name): 

1245 raise TypeError(f"{type(self).__name__}.name must be a hashable type") 

1246 self._name = values[0] 

1247 

1248 names = property(fset=_set_names, fget=_get_names) 

1249 

1250 def set_names(self, names, level=None, inplace=False): 

1251 """ 

1252 Set Index or MultiIndex name. 

1253 

1254 Able to set new names partially and by level. 

1255 

1256 Parameters 

1257 ---------- 

1258 names : label or list of label 

1259 Name(s) to set. 

1260 level : int, label or list of int or label, optional 

1261 If the index is a MultiIndex, level(s) to set (None for all 

1262 levels). Otherwise level must be None. 

1263 inplace : bool, default False 

1264 Modifies the object directly, instead of creating a new Index or 

1265 MultiIndex. 

1266 

1267 Returns 

1268 ------- 

1269 Index 

1270 The same type as the caller or None if inplace is True. 

1271 

1272 See Also 

1273 -------- 

1274 Index.rename : Able to set new names without level. 

1275 

1276 Examples 

1277 -------- 

1278 >>> idx = pd.Index([1, 2, 3, 4]) 

1279 >>> idx 

1280 Int64Index([1, 2, 3, 4], dtype='int64') 

1281 >>> idx.set_names('quarter') 

1282 Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') 

1283 

1284 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1285 ... [2018, 2019]]) 

1286 >>> idx 

1287 MultiIndex([('python', 2018), 

1288 ('python', 2019), 

1289 ( 'cobra', 2018), 

1290 ( 'cobra', 2019)], 

1291 ) 

1292 >>> idx.set_names(['kind', 'year'], inplace=True) 

1293 >>> idx 

1294 MultiIndex([('python', 2018), 

1295 ('python', 2019), 

1296 ( 'cobra', 2018), 

1297 ( 'cobra', 2019)], 

1298 names=['kind', 'year']) 

1299 >>> idx.set_names('species', level=0) 

1300 MultiIndex([('python', 2018), 

1301 ('python', 2019), 

1302 ( 'cobra', 2018), 

1303 ( 'cobra', 2019)], 

1304 names=['species', 'year']) 

1305 """ 

1306 

1307 if level is not None and not isinstance(self, ABCMultiIndex): 

1308 raise ValueError("Level must be None for non-MultiIndex") 

1309 

1310 if level is not None and not is_list_like(level) and is_list_like(names): 

1311 raise TypeError("Names must be a string when a single level is provided.") 

1312 

1313 if not is_list_like(names) and level is None and self.nlevels > 1: 

1314 raise TypeError("Must pass list-like as `names`.") 

1315 

1316 if not is_list_like(names): 

1317 names = [names] 

1318 if level is not None and not is_list_like(level): 

1319 level = [level] 

1320 

1321 if inplace: 

1322 idx = self 

1323 else: 

1324 idx = self._shallow_copy() 

1325 idx._set_names(names, level=level) 

1326 if not inplace: 

1327 return idx 

1328 

1329 def rename(self, name, inplace=False): 

1330 """ 

1331 Alter Index or MultiIndex name. 

1332 

1333 Able to set new names without level. Defaults to returning new index. 

1334 Length of names must match number of levels in MultiIndex. 

1335 

1336 Parameters 

1337 ---------- 

1338 name : label or list of labels 

1339 Name(s) to set. 

1340 inplace : bool, default False 

1341 Modifies the object directly, instead of creating a new Index or 

1342 MultiIndex. 

1343 

1344 Returns 

1345 ------- 

1346 Index 

1347 The same type as the caller or None if inplace is True. 

1348 

1349 See Also 

1350 -------- 

1351 Index.set_names : Able to set new names partially and by level. 

1352 

1353 Examples 

1354 -------- 

1355 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') 

1356 >>> idx.rename('grade') 

1357 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') 

1358 

1359 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1360 ... [2018, 2019]], 

1361 ... names=['kind', 'year']) 

1362 >>> idx 

1363 MultiIndex([('python', 2018), 

1364 ('python', 2019), 

1365 ( 'cobra', 2018), 

1366 ( 'cobra', 2019)], 

1367 names=['kind', 'year']) 

1368 >>> idx.rename(['species', 'year']) 

1369 MultiIndex([('python', 2018), 

1370 ('python', 2019), 

1371 ( 'cobra', 2018), 

1372 ( 'cobra', 2019)], 

1373 names=['species', 'year']) 

1374 >>> idx.rename('species') 

1375 Traceback (most recent call last): 

1376 TypeError: Must pass list-like as `names`. 

1377 """ 

1378 return self.set_names([name], inplace=inplace) 

1379 

1380 # -------------------------------------------------------------------- 

1381 # Level-Centric Methods 

1382 

1383 @property 

1384 def nlevels(self) -> int: 

1385 """ 

1386 Number of levels. 

1387 """ 

1388 return 1 

1389 

1390 def _sort_levels_monotonic(self): 

1391 """ 

1392 Compat with MultiIndex. 

1393 """ 

1394 return self 

1395 

1396 def _validate_index_level(self, level): 

1397 """ 

1398 Validate index level. 

1399 

1400 For single-level Index getting level number is a no-op, but some 

1401 verification must be done like in MultiIndex. 

1402 

1403 """ 

1404 if isinstance(level, int): 

1405 if level < 0 and level != -1: 

1406 raise IndexError( 

1407 "Too many levels: Index has only 1 level, " 

1408 f"{level} is not a valid level number" 

1409 ) 

1410 elif level > 0: 

1411 raise IndexError( 

1412 f"Too many levels: Index has only 1 level, not {level + 1}" 

1413 ) 

1414 elif level != self.name: 

1415 raise KeyError( 

1416 f"Requested level ({level}) does not match index name ({self.name})" 

1417 ) 

1418 

1419 def _get_level_number(self, level): 

1420 self._validate_index_level(level) 

1421 return 0 

1422 

1423 def sortlevel(self, level=None, ascending=True, sort_remaining=None): 

1424 """ 

1425 For internal compatibility with with the Index API. 

1426 

1427 Sort the Index. This is for compat with MultiIndex 

1428 

1429 Parameters 

1430 ---------- 

1431 ascending : bool, default True 

1432 False to sort in descending order 

1433 

1434 level, sort_remaining are compat parameters 

1435 

1436 Returns 

1437 ------- 

1438 Index 

1439 """ 

1440 return self.sort_values(return_indexer=True, ascending=ascending) 

1441 

1442 def _get_level_values(self, level): 

1443 """ 

1444 Return an Index of values for requested level. 

1445 

1446 This is primarily useful to get an individual level of values from a 

1447 MultiIndex, but is provided on Index as well for compatibility. 

1448 

1449 Parameters 

1450 ---------- 

1451 level : int or str 

1452 It is either the integer position or the name of the level. 

1453 

1454 Returns 

1455 ------- 

1456 Index 

1457 Calling object, as there is only one level in the Index. 

1458 

1459 See Also 

1460 -------- 

1461 MultiIndex.get_level_values : Get values for a level of a MultiIndex. 

1462 

1463 Notes 

1464 ----- 

1465 For Index, level should be 0, since there are no multiple levels. 

1466 

1467 Examples 

1468 -------- 

1469 

1470 >>> idx = pd.Index(list('abc')) 

1471 >>> idx 

1472 Index(['a', 'b', 'c'], dtype='object') 

1473 

1474 Get level values by supplying `level` as integer: 

1475 

1476 >>> idx.get_level_values(0) 

1477 Index(['a', 'b', 'c'], dtype='object') 

1478 """ 

1479 self._validate_index_level(level) 

1480 return self 

1481 

1482 get_level_values = _get_level_values 

1483 

1484 def droplevel(self, level=0): 

1485 """ 

1486 Return index with requested level(s) removed. 

1487 

1488 If resulting index has only 1 level left, the result will be 

1489 of Index type, not MultiIndex. 

1490 

1491 .. versionadded:: 0.23.1 (support for non-MultiIndex) 

1492 

1493 Parameters 

1494 ---------- 

1495 level : int, str, or list-like, default 0 

1496 If a string is given, must be the name of a level 

1497 If list-like, elements must be names or indexes of levels. 

1498 

1499 Returns 

1500 ------- 

1501 Index or MultiIndex 

1502 """ 

1503 if not isinstance(level, (tuple, list)): 

1504 level = [level] 

1505 

1506 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] 

1507 

1508 if len(level) == 0: 

1509 return self 

1510 if len(level) >= self.nlevels: 

1511 raise ValueError( 

1512 f"Cannot remove {len(level)} levels from an index with {self.nlevels} " 

1513 "levels: at least one level must be left." 

1514 ) 

1515 # The two checks above guarantee that here self is a MultiIndex 

1516 

1517 new_levels = list(self.levels) 

1518 new_codes = list(self.codes) 

1519 new_names = list(self.names) 

1520 

1521 for i in levnums: 

1522 new_levels.pop(i) 

1523 new_codes.pop(i) 

1524 new_names.pop(i) 

1525 

1526 if len(new_levels) == 1: 

1527 

1528 # set nan if needed 

1529 mask = new_codes[0] == -1 

1530 result = new_levels[0].take(new_codes[0]) 

1531 if mask.any(): 

1532 result = result.putmask(mask, np.nan) 

1533 

1534 result._name = new_names[0] 

1535 return result 

1536 else: 

1537 from pandas.core.indexes.multi import MultiIndex 

1538 

1539 return MultiIndex( 

1540 levels=new_levels, 

1541 codes=new_codes, 

1542 names=new_names, 

1543 verify_integrity=False, 

1544 ) 

1545 

1546 _index_shared_docs[ 

1547 "_get_grouper_for_level" 

1548 ] = """ 

1549 Get index grouper corresponding to an index level 

1550 

1551 Parameters 

1552 ---------- 

1553 mapper: Group mapping function or None 

1554 Function mapping index values to groups 

1555 level : int or None 

1556 Index level 

1557 

1558 Returns 

1559 ------- 

1560 grouper : Index 

1561 Index of values to group on. 

1562 labels : ndarray of int or None 

1563 Array of locations in level_index. 

1564 uniques : Index or None 

1565 Index of unique values for level. 

1566 """ 

1567 

1568 @Appender(_index_shared_docs["_get_grouper_for_level"]) 

1569 def _get_grouper_for_level(self, mapper, level=None): 

1570 assert level is None or level == 0 

1571 if mapper is None: 

1572 grouper = self 

1573 else: 

1574 grouper = self.map(mapper) 

1575 

1576 return grouper, None, None 

1577 

1578 # -------------------------------------------------------------------- 

1579 # Introspection Methods 

1580 

1581 @property 

1582 def is_monotonic(self) -> bool: 

1583 """ 

1584 Alias for is_monotonic_increasing. 

1585 """ 

1586 return self.is_monotonic_increasing 

1587 

1588 @property 

1589 def is_monotonic_increasing(self): 

1590 """ 

1591 Return if the index is monotonic increasing (only equal or 

1592 increasing) values. 

1593 

1594 Examples 

1595 -------- 

1596 >>> Index([1, 2, 3]).is_monotonic_increasing 

1597 True 

1598 >>> Index([1, 2, 2]).is_monotonic_increasing 

1599 True 

1600 >>> Index([1, 3, 2]).is_monotonic_increasing 

1601 False 

1602 """ 

1603 return self._engine.is_monotonic_increasing 

1604 

1605 @property 

1606 def is_monotonic_decreasing(self) -> bool: 

1607 """ 

1608 Return if the index is monotonic decreasing (only equal or 

1609 decreasing) values. 

1610 

1611 Examples 

1612 -------- 

1613 >>> Index([3, 2, 1]).is_monotonic_decreasing 

1614 True 

1615 >>> Index([3, 2, 2]).is_monotonic_decreasing 

1616 True 

1617 >>> Index([3, 1, 2]).is_monotonic_decreasing 

1618 False 

1619 """ 

1620 return self._engine.is_monotonic_decreasing 

1621 

1622 @property 

1623 def _is_strictly_monotonic_increasing(self) -> bool: 

1624 """ 

1625 Return if the index is strictly monotonic increasing 

1626 (only increasing) values. 

1627 

1628 Examples 

1629 -------- 

1630 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing 

1631 True 

1632 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing 

1633 False 

1634 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing 

1635 False 

1636 """ 

1637 return self.is_unique and self.is_monotonic_increasing 

1638 

1639 @property 

1640 def _is_strictly_monotonic_decreasing(self) -> bool: 

1641 """ 

1642 Return if the index is strictly monotonic decreasing 

1643 (only decreasing) values. 

1644 

1645 Examples 

1646 -------- 

1647 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing 

1648 True 

1649 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing 

1650 False 

1651 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing 

1652 False 

1653 """ 

1654 return self.is_unique and self.is_monotonic_decreasing 

1655 

1656 @cache_readonly 

1657 def is_unique(self) -> bool: 

1658 """ 

1659 Return if the index has unique values. 

1660 """ 

1661 return self._engine.is_unique 

1662 

1663 @property 

1664 def has_duplicates(self) -> bool: 

1665 return not self.is_unique 

1666 

1667 def is_boolean(self) -> bool: 

1668 return self.inferred_type in ["boolean"] 

1669 

1670 def is_integer(self) -> bool: 

1671 return self.inferred_type in ["integer"] 

1672 

1673 def is_floating(self) -> bool: 

1674 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] 

1675 

1676 def is_numeric(self) -> bool: 

1677 return self.inferred_type in ["integer", "floating"] 

1678 

1679 def is_object(self) -> bool: 

1680 return is_object_dtype(self.dtype) 

1681 

1682 def is_categorical(self) -> bool: 

1683 """ 

1684 Check if the Index holds categorical data. 

1685 

1686 Returns 

1687 ------- 

1688 boolean 

1689 True if the Index is categorical. 

1690 

1691 See Also 

1692 -------- 

1693 CategoricalIndex : Index for categorical data. 

1694 

1695 Examples 

1696 -------- 

1697 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

1698 ... "Watermelon"]).astype("category") 

1699 >>> idx.is_categorical() 

1700 True 

1701 

1702 >>> idx = pd.Index([1, 3, 5, 7]) 

1703 >>> idx.is_categorical() 

1704 False 

1705 

1706 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) 

1707 >>> s 

1708 0 Peter 

1709 1 Victor 

1710 2 Elisabeth 

1711 3 Mar 

1712 dtype: object 

1713 >>> s.index.is_categorical() 

1714 False 

1715 """ 

1716 return self.inferred_type in ["categorical"] 

1717 

1718 def is_interval(self) -> bool: 

1719 return self.inferred_type in ["interval"] 

1720 

1721 def is_mixed(self) -> bool: 

1722 return self.inferred_type in ["mixed"] 

1723 

1724 def holds_integer(self): 

1725 """ 

1726 Whether the type is an integer type. 

1727 """ 

1728 return self.inferred_type in ["integer", "mixed-integer"] 

1729 

1730 @cache_readonly 

1731 def inferred_type(self): 

1732 """ 

1733 Return a string of the type inferred from the values. 

1734 """ 

1735 return lib.infer_dtype(self, skipna=False) 

1736 

1737 @cache_readonly 

1738 def is_all_dates(self) -> bool: 

1739 return is_datetime_array(ensure_object(self.values)) 

1740 

1741 # -------------------------------------------------------------------- 

1742 # Pickle Methods 

1743 

1744 def __reduce__(self): 

1745 d = dict(data=self._data) 

1746 d.update(self._get_attributes_dict()) 

1747 return _new_Index, (type(self), d), None 

1748 

1749 # -------------------------------------------------------------------- 

1750 # Null Handling Methods 

1751 

1752 _na_value = np.nan 

1753 """The expected NA value to use with this index.""" 

1754 

1755 @cache_readonly 

1756 def _isnan(self): 

1757 """ 

1758 Return if each value is NaN. 

1759 """ 

1760 if self._can_hold_na: 

1761 return isna(self) 

1762 else: 

1763 # shouldn't reach to this condition by checking hasnans beforehand 

1764 values = np.empty(len(self), dtype=np.bool_) 

1765 values.fill(False) 

1766 return values 

1767 

1768 @cache_readonly 

1769 def _nan_idxs(self): 

1770 if self._can_hold_na: 

1771 return self._isnan.nonzero()[0] 

1772 else: 

1773 return np.array([], dtype=np.int64) 

1774 

1775 @cache_readonly 

1776 def hasnans(self): 

1777 """ 

1778 Return if I have any nans; enables various perf speedups. 

1779 """ 

1780 if self._can_hold_na: 

1781 return bool(self._isnan.any()) 

1782 else: 

1783 return False 

1784 

1785 def isna(self): 

1786 """ 

1787 Detect missing values. 

1788 

1789 Return a boolean same-sized object indicating if the values are NA. 

1790 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get 

1791 mapped to ``True`` values. 

1792 Everything else get mapped to ``False`` values. Characters such as 

1793 empty strings `''` or :attr:`numpy.inf` are not considered NA values 

1794 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

1795 

1796 Returns 

1797 ------- 

1798 numpy.ndarray 

1799 A boolean array of whether my values are NA. 

1800 

1801 See Also 

1802 -------- 

1803 Index.notna : Boolean inverse of isna. 

1804 Index.dropna : Omit entries with missing values. 

1805 isna : Top-level isna. 

1806 Series.isna : Detect missing values in Series object. 

1807 

1808 Examples 

1809 -------- 

1810 Show which entries in a pandas.Index are NA. The result is an 

1811 array. 

1812 

1813 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

1814 >>> idx 

1815 Float64Index([5.2, 6.0, nan], dtype='float64') 

1816 >>> idx.isna() 

1817 array([False, False, True], dtype=bool) 

1818 

1819 Empty strings are not considered NA values. None is considered an NA 

1820 value. 

1821 

1822 >>> idx = pd.Index(['black', '', 'red', None]) 

1823 >>> idx 

1824 Index(['black', '', 'red', None], dtype='object') 

1825 >>> idx.isna() 

1826 array([False, False, False, True], dtype=bool) 

1827 

1828 For datetimes, `NaT` (Not a Time) is considered as an NA value. 

1829 

1830 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), 

1831 ... pd.Timestamp(''), None, pd.NaT]) 

1832 >>> idx 

1833 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], 

1834 dtype='datetime64[ns]', freq=None) 

1835 >>> idx.isna() 

1836 array([False, True, True, True], dtype=bool) 

1837 """ 

1838 return self._isnan 

1839 

1840 isnull = isna 

1841 

1842 def notna(self): 

1843 """ 

1844 Detect existing (non-missing) values. 

1845 

1846 Return a boolean same-sized object indicating if the values are not NA. 

1847 Non-missing values get mapped to ``True``. Characters such as empty 

1848 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

1849 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

1850 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` 

1851 values. 

1852 

1853 Returns 

1854 ------- 

1855 numpy.ndarray 

1856 Boolean array to indicate which entries are not NA. 

1857 

1858 See Also 

1859 -------- 

1860 Index.notnull : Alias of notna. 

1861 Index.isna: Inverse of notna. 

1862 notna : Top-level notna. 

1863 

1864 Examples 

1865 -------- 

1866 Show which entries in an Index are not NA. The result is an 

1867 array. 

1868 

1869 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

1870 >>> idx 

1871 Float64Index([5.2, 6.0, nan], dtype='float64') 

1872 >>> idx.notna() 

1873 array([ True, True, False]) 

1874 

1875 Empty strings are not considered NA values. None is considered a NA 

1876 value. 

1877 

1878 >>> idx = pd.Index(['black', '', 'red', None]) 

1879 >>> idx 

1880 Index(['black', '', 'red', None], dtype='object') 

1881 >>> idx.notna() 

1882 array([ True, True, True, False]) 

1883 """ 

1884 return ~self.isna() 

1885 

1886 notnull = notna 

1887 

1888 _index_shared_docs[ 

1889 "fillna" 

1890 ] = """ 

1891 Fill NA/NaN values with the specified value. 

1892 

1893 Parameters 

1894 ---------- 

1895 value : scalar 

1896 Scalar value to use to fill holes (e.g. 0). 

1897 This value cannot be a list-likes. 

1898 downcast : dict, default is None 

1899 a dict of item->dtype of what to downcast if possible, 

1900 or the string 'infer' which will try to downcast to an appropriate 

1901 equal type (e.g. float64 to int64 if possible). 

1902 

1903 Returns 

1904 ------- 

1905 filled : Index 

1906 """ 

1907 

1908 @Appender(_index_shared_docs["fillna"]) 

1909 def fillna(self, value=None, downcast=None): 

1910 self._assert_can_do_op(value) 

1911 if self.hasnans: 

1912 result = self.putmask(self._isnan, value) 

1913 if downcast is None: 

1914 # no need to care metadata other than name 

1915 # because it can't have freq if 

1916 return Index(result, name=self.name) 

1917 return self._shallow_copy() 

1918 

1919 _index_shared_docs[ 

1920 "dropna" 

1921 ] = """ 

1922 Return Index without NA/NaN values. 

1923 

1924 Parameters 

1925 ---------- 

1926 how : {'any', 'all'}, default 'any' 

1927 If the Index is a MultiIndex, drop the value when any or all levels 

1928 are NaN. 

1929 

1930 Returns 

1931 ------- 

1932 valid : Index 

1933 """ 

1934 

1935 @Appender(_index_shared_docs["dropna"]) 

1936 def dropna(self, how="any"): 

1937 if how not in ("any", "all"): 

1938 raise ValueError(f"invalid how option: {how}") 

1939 

1940 if self.hasnans: 

1941 return self._shallow_copy(self._values[~self._isnan]) 

1942 return self._shallow_copy() 

1943 

1944 # -------------------------------------------------------------------- 

1945 # Uniqueness Methods 

1946 

1947 _index_shared_docs[ 

1948 "index_unique" 

1949 ] = """ 

1950 Return unique values in the index. Uniques are returned in order 

1951 of appearance, this does NOT sort. 

1952 

1953 Parameters 

1954 ---------- 

1955 level : int or str, optional, default None 

1956 Only return values from specified level (for MultiIndex). 

1957 

1958 .. versionadded:: 0.23.0 

1959 

1960 Returns 

1961 ------- 

1962 Index without duplicates 

1963 

1964 See Also 

1965 -------- 

1966 unique 

1967 Series.unique 

1968 """ 

1969 

1970 @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) 

1971 def unique(self, level=None): 

1972 if level is not None: 

1973 self._validate_index_level(level) 

1974 result = super().unique() 

1975 return self._shallow_copy(result) 

1976 

1977 def drop_duplicates(self, keep="first"): 

1978 """ 

1979 Return Index with duplicate values removed. 

1980 

1981 Parameters 

1982 ---------- 

1983 keep : {'first', 'last', ``False``}, default 'first' 

1984 - 'first' : Drop duplicates except for the first occurrence. 

1985 - 'last' : Drop duplicates except for the last occurrence. 

1986 - ``False`` : Drop all duplicates. 

1987 

1988 Returns 

1989 ------- 

1990 deduplicated : Index 

1991 

1992 See Also 

1993 -------- 

1994 Series.drop_duplicates : Equivalent method on Series. 

1995 DataFrame.drop_duplicates : Equivalent method on DataFrame. 

1996 Index.duplicated : Related method on Index, indicating duplicate 

1997 Index values. 

1998 

1999 Examples 

2000 -------- 

2001 Generate an pandas.Index with duplicate values. 

2002 

2003 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) 

2004 

2005 The `keep` parameter controls which duplicate values are removed. 

2006 The value 'first' keeps the first occurrence for each 

2007 set of duplicated entries. The default value of keep is 'first'. 

2008 

2009 >>> idx.drop_duplicates(keep='first') 

2010 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') 

2011 

2012 The value 'last' keeps the last occurrence for each set of duplicated 

2013 entries. 

2014 

2015 >>> idx.drop_duplicates(keep='last') 

2016 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') 

2017 

2018 The value ``False`` discards all sets of duplicated entries. 

2019 

2020 >>> idx.drop_duplicates(keep=False) 

2021 Index(['cow', 'beetle', 'hippo'], dtype='object') 

2022 """ 

2023 return super().drop_duplicates(keep=keep) 

2024 

2025 def duplicated(self, keep="first"): 

2026 """ 

2027 Indicate duplicate index values. 

2028 

2029 Duplicated values are indicated as ``True`` values in the resulting 

2030 array. Either all duplicates, all except the first, or all except the 

2031 last occurrence of duplicates can be indicated. 

2032 

2033 Parameters 

2034 ---------- 

2035 keep : {'first', 'last', False}, default 'first' 

2036 The value or values in a set of duplicates to mark as missing. 

2037 

2038 - 'first' : Mark duplicates as ``True`` except for the first 

2039 occurrence. 

2040 - 'last' : Mark duplicates as ``True`` except for the last 

2041 occurrence. 

2042 - ``False`` : Mark all duplicates as ``True``. 

2043 

2044 Returns 

2045 ------- 

2046 numpy.ndarray 

2047 

2048 See Also 

2049 -------- 

2050 Series.duplicated : Equivalent method on pandas.Series. 

2051 DataFrame.duplicated : Equivalent method on pandas.DataFrame. 

2052 Index.drop_duplicates : Remove duplicate values from Index. 

2053 

2054 Examples 

2055 -------- 

2056 By default, for each set of duplicated values, the first occurrence is 

2057 set to False and all others to True: 

2058 

2059 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) 

2060 >>> idx.duplicated() 

2061 array([False, False, True, False, True]) 

2062 

2063 which is equivalent to 

2064 

2065 >>> idx.duplicated(keep='first') 

2066 array([False, False, True, False, True]) 

2067 

2068 By using 'last', the last occurrence of each set of duplicated values 

2069 is set on False and all others on True: 

2070 

2071 >>> idx.duplicated(keep='last') 

2072 array([ True, False, True, False, False]) 

2073 

2074 By setting keep on ``False``, all duplicates are True: 

2075 

2076 >>> idx.duplicated(keep=False) 

2077 array([ True, False, True, False, True]) 

2078 """ 

2079 return super().duplicated(keep=keep) 

2080 

2081 def _get_unique_index(self, dropna=False): 

2082 """ 

2083 Returns an index containing unique values. 

2084 

2085 Parameters 

2086 ---------- 

2087 dropna : bool 

2088 If True, NaN values are dropped. 

2089 

2090 Returns 

2091 ------- 

2092 uniques : index 

2093 """ 

2094 if self.is_unique and not dropna: 

2095 return self 

2096 

2097 values = self.values 

2098 

2099 if not self.is_unique: 

2100 values = self.unique() 

2101 

2102 if dropna: 

2103 try: 

2104 if self.hasnans: 

2105 values = values[~isna(values)] 

2106 except NotImplementedError: 

2107 pass 

2108 

2109 return self._shallow_copy(values) 

2110 

2111 # -------------------------------------------------------------------- 

2112 # Arithmetic & Logical Methods 

2113 

2114 def __add__(self, other): 

2115 if isinstance(other, (ABCSeries, ABCDataFrame)): 

2116 return NotImplemented 

2117 from pandas import Series 

2118 

2119 return Index(Series(self) + other) 

2120 

2121 def __radd__(self, other): 

2122 from pandas import Series 

2123 

2124 return Index(other + Series(self)) 

2125 

2126 def __iadd__(self, other): 

2127 # alias for __add__ 

2128 return self + other 

2129 

2130 def __sub__(self, other): 

2131 return Index(np.array(self) - other) 

2132 

2133 def __rsub__(self, other): 

2134 # wrap Series to ensure we pin name correctly 

2135 from pandas import Series 

2136 

2137 return Index(other - Series(self)) 

2138 

2139 def __and__(self, other): 

2140 return self.intersection(other) 

2141 

2142 def __or__(self, other): 

2143 return self.union(other) 

2144 

2145 def __xor__(self, other): 

2146 return self.symmetric_difference(other) 

2147 

2148 def __nonzero__(self): 

2149 raise ValueError( 

2150 f"The truth value of a {type(self).__name__} is ambiguous. " 

2151 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

2152 ) 

2153 

2154 __bool__ = __nonzero__ 

2155 

2156 # -------------------------------------------------------------------- 

2157 # Set Operation Methods 

2158 

2159 def _get_reconciled_name_object(self, other): 

2160 """ 

2161 If the result of a set operation will be self, 

2162 return self, unless the name changes, in which 

2163 case make a shallow copy of self. 

2164 """ 

2165 name = get_op_result_name(self, other) 

2166 if self.name != name: 

2167 return self._shallow_copy(name=name) 

2168 return self 

2169 

2170 def _union_incompatible_dtypes(self, other, sort): 

2171 """ 

2172 Casts this and other index to object dtype to allow the formation 

2173 of a union between incompatible types. 

2174 

2175 Parameters 

2176 ---------- 

2177 other : Index or array-like 

2178 sort : False or None, default False 

2179 Whether to sort the resulting index. 

2180 

2181 * False : do not sort the result. 

2182 * None : sort the result, except when `self` and `other` are equal 

2183 or when the values cannot be compared. 

2184 

2185 Returns 

2186 ------- 

2187 Index 

2188 """ 

2189 this = self.astype(object, copy=False) 

2190 # cast to Index for when `other` is list-like 

2191 other = Index(other).astype(object, copy=False) 

2192 return Index.union(this, other, sort=sort).astype(object, copy=False) 

2193 

2194 def _is_compatible_with_other(self, other): 

2195 """ 

2196 Check whether this and the other dtype are compatible with each other. 

2197 Meaning a union can be formed between them without needing to be cast 

2198 to dtype object. 

2199 

2200 Parameters 

2201 ---------- 

2202 other : Index or array-like 

2203 

2204 Returns 

2205 ------- 

2206 bool 

2207 """ 

2208 return type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype) 

2209 

2210 def _validate_sort_keyword(self, sort): 

2211 if sort not in [None, False]: 

2212 raise ValueError( 

2213 "The 'sort' keyword only takes the values of " 

2214 f"None or False; {sort} was passed." 

2215 ) 

2216 

2217 def union(self, other, sort=None): 

2218 """ 

2219 Form the union of two Index objects. 

2220 

2221 If the Index objects are incompatible, both Index objects will be 

2222 cast to dtype('object') first. 

2223 

2224 .. versionchanged:: 0.25.0 

2225 

2226 Parameters 

2227 ---------- 

2228 other : Index or array-like 

2229 sort : bool or None, default None 

2230 Whether to sort the resulting Index. 

2231 

2232 * None : Sort the result, except when 

2233 

2234 1. `self` and `other` are equal. 

2235 2. `self` or `other` has length 0. 

2236 3. Some values in `self` or `other` cannot be compared. 

2237 A RuntimeWarning is issued in this case. 

2238 

2239 * False : do not sort the result. 

2240 

2241 .. versionadded:: 0.24.0 

2242 

2243 .. versionchanged:: 0.24.1 

2244 

2245 Changed the default value from ``True`` to ``None`` 

2246 (without change in behaviour). 

2247 

2248 Returns 

2249 ------- 

2250 union : Index 

2251 

2252 Examples 

2253 -------- 

2254 

2255 Union matching dtypes 

2256 

2257 >>> idx1 = pd.Index([1, 2, 3, 4]) 

2258 >>> idx2 = pd.Index([3, 4, 5, 6]) 

2259 >>> idx1.union(idx2) 

2260 Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') 

2261 

2262 Union mismatched dtypes 

2263 

2264 >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) 

2265 >>> idx2 = pd.Index([1, 2, 3, 4]) 

2266 >>> idx1.union(idx2) 

2267 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') 

2268 """ 

2269 self._validate_sort_keyword(sort) 

2270 self._assert_can_do_setop(other) 

2271 

2272 if not self._is_compatible_with_other(other): 

2273 return self._union_incompatible_dtypes(other, sort=sort) 

2274 

2275 return self._union(other, sort=sort) 

2276 

2277 def _union(self, other, sort): 

2278 """ 

2279 Specific union logic should go here. In subclasses, union behavior 

2280 should be overwritten here rather than in `self.union`. 

2281 

2282 Parameters 

2283 ---------- 

2284 other : Index or array-like 

2285 sort : False or None, default False 

2286 Whether to sort the resulting index. 

2287 

2288 * False : do not sort the result. 

2289 * None : sort the result, except when `self` and `other` are equal 

2290 or when the values cannot be compared. 

2291 

2292 Returns 

2293 ------- 

2294 Index 

2295 """ 

2296 

2297 if not len(other) or self.equals(other): 

2298 return self._get_reconciled_name_object(other) 

2299 

2300 if not len(self): 

2301 return other._get_reconciled_name_object(self) 

2302 

2303 # TODO(EA): setops-refactor, clean all this up 

2304 if is_datetime64tz_dtype(self): 

2305 lvals = self._ndarray_values 

2306 else: 

2307 lvals = self._values 

2308 if is_datetime64tz_dtype(other): 

2309 rvals = other._ndarray_values 

2310 else: 

2311 rvals = other._values 

2312 

2313 if sort is None and self.is_monotonic and other.is_monotonic: 

2314 try: 

2315 result = self._outer_indexer(lvals, rvals)[0] 

2316 except TypeError: 

2317 # incomparable objects 

2318 result = list(lvals) 

2319 

2320 # worth making this faster? a very unusual case 

2321 value_set = set(lvals) 

2322 result.extend([x for x in rvals if x not in value_set]) 

2323 else: 

2324 # find indexes of things in "other" that are not in "self" 

2325 if self.is_unique: 

2326 indexer = self.get_indexer(other) 

2327 indexer = (indexer == -1).nonzero()[0] 

2328 else: 

2329 indexer = algos.unique1d(self.get_indexer_non_unique(other)[1]) 

2330 

2331 if len(indexer) > 0: 

2332 other_diff = algos.take_nd(rvals, indexer, allow_fill=False) 

2333 result = concat_compat((lvals, other_diff)) 

2334 

2335 else: 

2336 result = lvals 

2337 

2338 if sort is None: 

2339 try: 

2340 result = algos.safe_sort(result) 

2341 except TypeError as err: 

2342 warnings.warn( 

2343 f"{err}, sort order is undefined for incomparable objects", 

2344 RuntimeWarning, 

2345 stacklevel=3, 

2346 ) 

2347 

2348 # for subclasses 

2349 return self._wrap_setop_result(other, result) 

2350 

2351 def _wrap_setop_result(self, other, result): 

2352 return self._constructor(result, name=get_op_result_name(self, other)) 

2353 

2354 _index_shared_docs[ 

2355 "intersection" 

2356 ] = """ 

2357 Form the intersection of two Index objects. 

2358 

2359 This returns a new Index with elements common to the index and `other`. 

2360 

2361 Parameters 

2362 ---------- 

2363 other : Index or array-like 

2364 sort : False or None, default False 

2365 Whether to sort the resulting index. 

2366 

2367 * False : do not sort the result. 

2368 * None : sort the result, except when `self` and `other` are equal 

2369 or when the values cannot be compared. 

2370 

2371 .. versionadded:: 0.24.0 

2372 

2373 .. versionchanged:: 0.24.1 

2374 

2375 Changed the default from ``True`` to ``False``, to match 

2376 the behaviour of 0.23.4 and earlier. 

2377 

2378 Returns 

2379 ------- 

2380 intersection : Index 

2381 

2382 Examples 

2383 -------- 

2384 

2385 >>> idx1 = pd.Index([1, 2, 3, 4]) 

2386 >>> idx2 = pd.Index([3, 4, 5, 6]) 

2387 >>> idx1.intersection(idx2) 

2388 Int64Index([3, 4], dtype='int64') 

2389 """ 

2390 

2391 # TODO: standardize return type of non-union setops type(self vs other) 

2392 @Appender(_index_shared_docs["intersection"]) 

2393 def intersection(self, other, sort=False): 

2394 self._validate_sort_keyword(sort) 

2395 self._assert_can_do_setop(other) 

2396 other = ensure_index(other) 

2397 

2398 if self.equals(other): 

2399 return self._get_reconciled_name_object(other) 

2400 

2401 if not is_dtype_equal(self.dtype, other.dtype): 

2402 this = self.astype("O") 

2403 other = other.astype("O") 

2404 return this.intersection(other, sort=sort) 

2405 

2406 # TODO(EA): setops-refactor, clean all this up 

2407 lvals = self._values 

2408 rvals = other._values 

2409 

2410 if self.is_monotonic and other.is_monotonic: 

2411 try: 

2412 result = self._inner_indexer(lvals, rvals)[0] 

2413 return self._wrap_setop_result(other, result) 

2414 except TypeError: 

2415 pass 

2416 

2417 try: 

2418 indexer = Index(rvals).get_indexer(lvals) 

2419 indexer = indexer.take((indexer != -1).nonzero()[0]) 

2420 except (InvalidIndexError, IncompatibleFrequency): 

2421 # InvalidIndexError raised by get_indexer if non-unique 

2422 # IncompatibleFrequency raised by PeriodIndex.get_indexer 

2423 indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0]) 

2424 indexer = indexer[indexer != -1] 

2425 

2426 taken = other.take(indexer) 

2427 res_name = get_op_result_name(self, other) 

2428 

2429 if sort is None: 

2430 taken = algos.safe_sort(taken.values) 

2431 return self._shallow_copy(taken, name=res_name) 

2432 

2433 taken.name = res_name 

2434 return taken 

2435 

2436 def difference(self, other, sort=None): 

2437 """ 

2438 Return a new Index with elements from the index that are not in 

2439 `other`. 

2440 

2441 This is the set difference of two Index objects. 

2442 

2443 Parameters 

2444 ---------- 

2445 other : Index or array-like 

2446 sort : False or None, default None 

2447 Whether to sort the resulting index. By default, the 

2448 values are attempted to be sorted, but any TypeError from 

2449 incomparable elements is caught by pandas. 

2450 

2451 * None : Attempt to sort the result, but catch any TypeErrors 

2452 from comparing incomparable elements. 

2453 * False : Do not sort the result. 

2454 

2455 .. versionadded:: 0.24.0 

2456 

2457 .. versionchanged:: 0.24.1 

2458 

2459 Changed the default value from ``True`` to ``None`` 

2460 (without change in behaviour). 

2461 

2462 Returns 

2463 ------- 

2464 difference : Index 

2465 

2466 Examples 

2467 -------- 

2468 

2469 >>> idx1 = pd.Index([2, 1, 3, 4]) 

2470 >>> idx2 = pd.Index([3, 4, 5, 6]) 

2471 >>> idx1.difference(idx2) 

2472 Int64Index([1, 2], dtype='int64') 

2473 >>> idx1.difference(idx2, sort=False) 

2474 Int64Index([2, 1], dtype='int64') 

2475 """ 

2476 self._validate_sort_keyword(sort) 

2477 self._assert_can_do_setop(other) 

2478 

2479 if self.equals(other): 

2480 # pass an empty np.ndarray with the appropriate dtype 

2481 return self._shallow_copy(self._data[:0]) 

2482 

2483 other, result_name = self._convert_can_do_setop(other) 

2484 

2485 this = self._get_unique_index() 

2486 

2487 indexer = this.get_indexer(other) 

2488 indexer = indexer.take((indexer != -1).nonzero()[0]) 

2489 

2490 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) 

2491 the_diff = this.values.take(label_diff) 

2492 if sort is None: 

2493 try: 

2494 the_diff = algos.safe_sort(the_diff) 

2495 except TypeError: 

2496 pass 

2497 

2498 return this._shallow_copy(the_diff, name=result_name) 

2499 

2500 def symmetric_difference(self, other, result_name=None, sort=None): 

2501 """ 

2502 Compute the symmetric difference of two Index objects. 

2503 

2504 Parameters 

2505 ---------- 

2506 other : Index or array-like 

2507 result_name : str 

2508 sort : False or None, default None 

2509 Whether to sort the resulting index. By default, the 

2510 values are attempted to be sorted, but any TypeError from 

2511 incomparable elements is caught by pandas. 

2512 

2513 * None : Attempt to sort the result, but catch any TypeErrors 

2514 from comparing incomparable elements. 

2515 * False : Do not sort the result. 

2516 

2517 .. versionadded:: 0.24.0 

2518 

2519 .. versionchanged:: 0.24.1 

2520 

2521 Changed the default value from ``True`` to ``None`` 

2522 (without change in behaviour). 

2523 

2524 Returns 

2525 ------- 

2526 symmetric_difference : Index 

2527 

2528 Notes 

2529 ----- 

2530 ``symmetric_difference`` contains elements that appear in either 

2531 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by 

2532 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates 

2533 dropped. 

2534 

2535 Examples 

2536 -------- 

2537 >>> idx1 = pd.Index([1, 2, 3, 4]) 

2538 >>> idx2 = pd.Index([2, 3, 4, 5]) 

2539 >>> idx1.symmetric_difference(idx2) 

2540 Int64Index([1, 5], dtype='int64') 

2541 

2542 You can also use the ``^`` operator: 

2543 

2544 >>> idx1 ^ idx2 

2545 Int64Index([1, 5], dtype='int64') 

2546 """ 

2547 self._validate_sort_keyword(sort) 

2548 self._assert_can_do_setop(other) 

2549 other, result_name_update = self._convert_can_do_setop(other) 

2550 if result_name is None: 

2551 result_name = result_name_update 

2552 

2553 this = self._get_unique_index() 

2554 other = other._get_unique_index() 

2555 indexer = this.get_indexer(other) 

2556 

2557 # {this} minus {other} 

2558 common_indexer = indexer.take((indexer != -1).nonzero()[0]) 

2559 left_indexer = np.setdiff1d( 

2560 np.arange(this.size), common_indexer, assume_unique=True 

2561 ) 

2562 left_diff = this._values.take(left_indexer) 

2563 

2564 # {other} minus {this} 

2565 right_indexer = (indexer == -1).nonzero()[0] 

2566 right_diff = other._values.take(right_indexer) 

2567 

2568 the_diff = concat_compat([left_diff, right_diff]) 

2569 if sort is None: 

2570 try: 

2571 the_diff = algos.safe_sort(the_diff) 

2572 except TypeError: 

2573 pass 

2574 

2575 attribs = self._get_attributes_dict() 

2576 attribs["name"] = result_name 

2577 if "freq" in attribs: 

2578 attribs["freq"] = None 

2579 return self._shallow_copy_with_infer(the_diff, **attribs) 

2580 

2581 def _assert_can_do_setop(self, other): 

2582 if not is_list_like(other): 

2583 raise TypeError("Input must be Index or array-like") 

2584 return True 

2585 

2586 def _convert_can_do_setop(self, other): 

2587 if not isinstance(other, Index): 

2588 other = Index(other, name=self.name) 

2589 result_name = self.name 

2590 else: 

2591 result_name = get_op_result_name(self, other) 

2592 return other, result_name 

2593 

2594 # -------------------------------------------------------------------- 

2595 # Indexing Methods 

2596 

2597 _index_shared_docs[ 

2598 "get_loc" 

2599 ] = """ 

2600 Get integer location, slice or boolean mask for requested label. 

2601 

2602 Parameters 

2603 ---------- 

2604 key : label 

2605 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

2606 * default: exact matches only. 

2607 * pad / ffill: find the PREVIOUS index value if no exact match. 

2608 * backfill / bfill: use NEXT index value if no exact match 

2609 * nearest: use the NEAREST index value if no exact match. Tied 

2610 distances are broken by preferring the larger index value. 

2611 tolerance : int or float, optional 

2612 Maximum distance from index value for inexact matches. The value of 

2613 the index at the matching location most satisfy the equation 

2614 ``abs(index[loc] - key) <= tolerance``. 

2615 

2616 .. versionadded:: 0.21.0 (list-like tolerance) 

2617 

2618 Returns 

2619 ------- 

2620 loc : int if unique index, slice if monotonic index, else mask 

2621 

2622 Examples 

2623 -------- 

2624 >>> unique_index = pd.Index(list('abc')) 

2625 >>> unique_index.get_loc('b') 

2626 1 

2627 

2628 >>> monotonic_index = pd.Index(list('abbc')) 

2629 >>> monotonic_index.get_loc('b') 

2630 slice(1, 3, None) 

2631 

2632 >>> non_monotonic_index = pd.Index(list('abcb')) 

2633 >>> non_monotonic_index.get_loc('b') 

2634 array([False, True, False, True], dtype=bool) 

2635 """ 

2636 

2637 @Appender(_index_shared_docs["get_loc"]) 

2638 def get_loc(self, key, method=None, tolerance=None): 

2639 if method is None: 

2640 if tolerance is not None: 

2641 raise ValueError( 

2642 "tolerance argument only valid if using pad, " 

2643 "backfill or nearest lookups" 

2644 ) 

2645 try: 

2646 return self._engine.get_loc(key) 

2647 except KeyError: 

2648 return self._engine.get_loc(self._maybe_cast_indexer(key)) 

2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance) 

2650 if indexer.ndim > 1 or indexer.size > 1: 

2651 raise TypeError("get_loc requires scalar valued input") 

2652 loc = indexer.item() 

2653 if loc == -1: 

2654 raise KeyError(key) 

2655 return loc 

2656 

2657 _index_shared_docs[ 

2658 "get_indexer" 

2659 ] = """ 

2660 Compute indexer and mask for new index given the current index. The 

2661 indexer should be then used as an input to ndarray.take to align the 

2662 current data to the new index. 

2663 

2664 Parameters 

2665 ---------- 

2666 target : %(target_klass)s 

2667 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

2668 * default: exact matches only. 

2669 * pad / ffill: find the PREVIOUS index value if no exact match. 

2670 * backfill / bfill: use NEXT index value if no exact match 

2671 * nearest: use the NEAREST index value if no exact match. Tied 

2672 distances are broken by preferring the larger index value. 

2673 limit : int, optional 

2674 Maximum number of consecutive labels in ``target`` to match for 

2675 inexact matches. 

2676 tolerance : optional 

2677 Maximum distance between original and new labels for inexact 

2678 matches. The values of the index at the matching locations most 

2679 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

2680 

2681 Tolerance may be a scalar value, which applies the same tolerance 

2682 to all values, or list-like, which applies variable tolerance per 

2683 element. List-like includes list, tuple, array, Series, and must be 

2684 the same size as the index and its dtype must exactly match the 

2685 index's type. 

2686 

2687 .. versionadded:: 0.21.0 (list-like tolerance) 

2688 

2689 Returns 

2690 ------- 

2691 indexer : ndarray of int 

2692 Integers from 0 to n - 1 indicating that the index at these 

2693 positions matches the corresponding target values. Missing values 

2694 in the target are marked by -1. 

2695 %(raises_section)s 

2696 Examples 

2697 -------- 

2698 >>> index = pd.Index(['c', 'a', 'b']) 

2699 >>> index.get_indexer(['a', 'b', 'x']) 

2700 array([ 1, 2, -1]) 

2701 

2702 Notice that the return value is an array of locations in ``index`` 

2703 and ``x`` is marked by -1, as it is not in ``index``. 

2704 """ 

2705 

2706 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) 

2707 def get_indexer(self, target, method=None, limit=None, tolerance=None): 

2708 method = missing.clean_reindex_fill_method(method) 

2709 target = ensure_index(target) 

2710 if tolerance is not None: 

2711 tolerance = self._convert_tolerance(tolerance, target) 

2712 

2713 # Treat boolean labels passed to a numeric index as not found. Without 

2714 # this fix False and True would be treated as 0 and 1 respectively. 

2715 # (GH #16877) 

2716 if target.is_boolean() and self.is_numeric(): 

2717 return ensure_platform_int(np.repeat(-1, target.size)) 

2718 

2719 pself, ptarget = self._maybe_promote(target) 

2720 if pself is not self or ptarget is not target: 

2721 return pself.get_indexer( 

2722 ptarget, method=method, limit=limit, tolerance=tolerance 

2723 ) 

2724 

2725 if not is_dtype_equal(self.dtype, target.dtype): 

2726 this = self.astype(object) 

2727 target = target.astype(object) 

2728 return this.get_indexer( 

2729 target, method=method, limit=limit, tolerance=tolerance 

2730 ) 

2731 

2732 if not self.is_unique: 

2733 raise InvalidIndexError( 

2734 "Reindexing only valid with uniquely valued Index objects" 

2735 ) 

2736 

2737 if method == "pad" or method == "backfill": 

2738 indexer = self._get_fill_indexer(target, method, limit, tolerance) 

2739 elif method == "nearest": 

2740 indexer = self._get_nearest_indexer(target, limit, tolerance) 

2741 else: 

2742 if tolerance is not None: 

2743 raise ValueError( 

2744 "tolerance argument only valid if doing pad, " 

2745 "backfill or nearest reindexing" 

2746 ) 

2747 if limit is not None: 

2748 raise ValueError( 

2749 "limit argument only valid if doing pad, " 

2750 "backfill or nearest reindexing" 

2751 ) 

2752 

2753 indexer = self._engine.get_indexer(target._ndarray_values) 

2754 

2755 return ensure_platform_int(indexer) 

2756 

2757 def _convert_tolerance(self, tolerance, target): 

2758 # override this method on subclasses 

2759 tolerance = np.asarray(tolerance) 

2760 if target.size != tolerance.size and tolerance.size > 1: 

2761 raise ValueError("list-like tolerance size must match target index size") 

2762 return tolerance 

2763 

2764 def _get_fill_indexer(self, target, method, limit=None, tolerance=None): 

2765 if self.is_monotonic_increasing and target.is_monotonic_increasing: 

2766 method = ( 

2767 self._engine.get_pad_indexer 

2768 if method == "pad" 

2769 else self._engine.get_backfill_indexer 

2770 ) 

2771 indexer = method(target._ndarray_values, limit) 

2772 else: 

2773 indexer = self._get_fill_indexer_searchsorted(target, method, limit) 

2774 if tolerance is not None: 

2775 indexer = self._filter_indexer_tolerance( 

2776 target._ndarray_values, indexer, tolerance 

2777 ) 

2778 return indexer 

2779 

2780 def _get_fill_indexer_searchsorted(self, target, method, limit=None): 

2781 """ 

2782 Fallback pad/backfill get_indexer that works for monotonic decreasing 

2783 indexes and non-monotonic targets. 

2784 """ 

2785 if limit is not None: 

2786 raise ValueError( 

2787 f"limit argument for {repr(method)} method only well-defined " 

2788 "if index and target are monotonic" 

2789 ) 

2790 

2791 side = "left" if method == "pad" else "right" 

2792 

2793 # find exact matches first (this simplifies the algorithm) 

2794 indexer = self.get_indexer(target) 

2795 nonexact = indexer == -1 

2796 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) 

2797 if side == "left": 

2798 # searchsorted returns "indices into a sorted array such that, 

2799 # if the corresponding elements in v were inserted before the 

2800 # indices, the order of a would be preserved". 

2801 # Thus, we need to subtract 1 to find values to the left. 

2802 indexer[nonexact] -= 1 

2803 # This also mapped not found values (values of 0 from 

2804 # np.searchsorted) to -1, which conveniently is also our 

2805 # sentinel for missing values 

2806 else: 

2807 # Mark indices to the right of the largest value as not found 

2808 indexer[indexer == len(self)] = -1 

2809 return indexer 

2810 

2811 def _get_nearest_indexer(self, target, limit, tolerance): 

2812 """ 

2813 Get the indexer for the nearest index labels; requires an index with 

2814 values that can be subtracted from each other (e.g., not strings or 

2815 tuples). 

2816 """ 

2817 left_indexer = self.get_indexer(target, "pad", limit=limit) 

2818 right_indexer = self.get_indexer(target, "backfill", limit=limit) 

2819 

2820 target_values = target._values 

2821 left_distances = np.abs(self._values[left_indexer] - target_values) 

2822 right_distances = np.abs(self._values[right_indexer] - target_values) 

2823 

2824 op = operator.lt if self.is_monotonic_increasing else operator.le 

2825 indexer = np.where( 

2826 op(left_distances, right_distances) | (right_indexer == -1), 

2827 left_indexer, 

2828 right_indexer, 

2829 ) 

2830 if tolerance is not None: 

2831 indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) 

2832 return indexer 

2833 

2834 def _filter_indexer_tolerance(self, target, indexer, tolerance): 

2835 distance = abs(self._values[indexer] - target) 

2836 indexer = np.where(distance <= tolerance, indexer, -1) 

2837 return indexer 

2838 

2839 # -------------------------------------------------------------------- 

2840 # Indexer Conversion Methods 

2841 

2842 _index_shared_docs[ 

2843 "_convert_scalar_indexer" 

2844 ] = """ 

2845 Convert a scalar indexer. 

2846 

2847 Parameters 

2848 ---------- 

2849 key : label of the slice bound 

2850 kind : {'ix', 'loc', 'getitem', 'iloc'} or None 

2851 """ 

2852 

2853 @Appender(_index_shared_docs["_convert_scalar_indexer"]) 

2854 def _convert_scalar_indexer(self, key, kind=None): 

2855 assert kind in ["ix", "loc", "getitem", "iloc", None] 

2856 

2857 if kind == "iloc": 

2858 return self._validate_indexer("positional", key, kind) 

2859 

2860 if len(self) and not isinstance(self, ABCMultiIndex): 

2861 

2862 # we can raise here if we are definitive that this 

2863 # is positional indexing (eg. .ix on with a float) 

2864 # or label indexing if we are using a type able 

2865 # to be represented in the index 

2866 

2867 if kind in ["getitem", "ix"] and is_float(key): 

2868 if not self.is_floating(): 

2869 return self._invalid_indexer("label", key) 

2870 

2871 elif kind in ["loc"] and is_float(key): 

2872 

2873 # we want to raise KeyError on string/mixed here 

2874 # technically we *could* raise a TypeError 

2875 # on anything but mixed though 

2876 if self.inferred_type not in [ 

2877 "floating", 

2878 "mixed-integer-float", 

2879 "integer-na", 

2880 "string", 

2881 "unicode", 

2882 "mixed", 

2883 ]: 

2884 self._invalid_indexer("label", key) 

2885 

2886 elif kind in ["loc"] and is_integer(key): 

2887 if not self.holds_integer(): 

2888 self._invalid_indexer("label", key) 

2889 

2890 return key 

2891 

2892 _index_shared_docs[ 

2893 "_convert_slice_indexer" 

2894 ] = """ 

2895 Convert a slice indexer. 

2896 

2897 By definition, these are labels unless 'iloc' is passed in. 

2898 Floats are not allowed as the start, step, or stop of the slice. 

2899 

2900 Parameters 

2901 ---------- 

2902 key : label of the slice bound 

2903 kind : {'ix', 'loc', 'getitem', 'iloc'} or None 

2904 """ 

2905 

2906 @Appender(_index_shared_docs["_convert_slice_indexer"]) 

2907 def _convert_slice_indexer(self, key: slice, kind=None): 

2908 assert kind in ["ix", "loc", "getitem", "iloc", None] 

2909 

2910 # validate iloc 

2911 if kind == "iloc": 

2912 return slice( 

2913 self._validate_indexer("slice", key.start, kind), 

2914 self._validate_indexer("slice", key.stop, kind), 

2915 self._validate_indexer("slice", key.step, kind), 

2916 ) 

2917 

2918 # potentially cast the bounds to integers 

2919 start, stop, step = key.start, key.stop, key.step 

2920 

2921 # figure out if this is a positional indexer 

2922 def is_int(v): 

2923 return v is None or is_integer(v) 

2924 

2925 is_null_slicer = start is None and stop is None 

2926 is_index_slice = is_int(start) and is_int(stop) 

2927 is_positional = is_index_slice and not ( 

2928 self.is_integer() or self.is_categorical() 

2929 ) 

2930 

2931 if kind == "getitem": 

2932 """ 

2933 called from the getitem slicers, validate that we are in fact 

2934 integers 

2935 """ 

2936 if self.is_integer() or is_index_slice: 

2937 return slice( 

2938 self._validate_indexer("slice", key.start, kind), 

2939 self._validate_indexer("slice", key.stop, kind), 

2940 self._validate_indexer("slice", key.step, kind), 

2941 ) 

2942 

2943 # convert the slice to an indexer here 

2944 

2945 # if we are mixed and have integers 

2946 try: 

2947 if is_positional and self.is_mixed(): 

2948 # Validate start & stop 

2949 if start is not None: 

2950 self.get_loc(start) 

2951 if stop is not None: 

2952 self.get_loc(stop) 

2953 is_positional = False 

2954 except KeyError: 

2955 if self.inferred_type in ["mixed-integer-float", "integer-na"]: 

2956 raise 

2957 

2958 if is_null_slicer: 

2959 indexer = key 

2960 elif is_positional: 

2961 indexer = key 

2962 else: 

2963 indexer = self.slice_indexer(start, stop, step, kind=kind) 

2964 

2965 return indexer 

2966 

2967 def _convert_listlike_indexer(self, keyarr, kind=None): 

2968 """ 

2969 Parameters 

2970 ---------- 

2971 keyarr : list-like 

2972 Indexer to convert. 

2973 

2974 Returns 

2975 ------- 

2976 indexer : numpy.ndarray or None 

2977 Return an ndarray or None if cannot convert. 

2978 keyarr : numpy.ndarray 

2979 Return tuple-safe keys. 

2980 """ 

2981 if isinstance(keyarr, Index): 

2982 keyarr = self._convert_index_indexer(keyarr) 

2983 else: 

2984 keyarr = self._convert_arr_indexer(keyarr) 

2985 

2986 indexer = self._convert_list_indexer(keyarr, kind=kind) 

2987 return indexer, keyarr 

2988 

2989 _index_shared_docs[ 

2990 "_convert_arr_indexer" 

2991 ] = """ 

2992 Convert an array-like indexer to the appropriate dtype. 

2993 

2994 Parameters 

2995 ---------- 

2996 keyarr : array-like 

2997 Indexer to convert. 

2998 

2999 Returns 

3000 ------- 

3001 converted_keyarr : array-like 

3002 """ 

3003 

3004 @Appender(_index_shared_docs["_convert_arr_indexer"]) 

3005 def _convert_arr_indexer(self, keyarr): 

3006 keyarr = com.asarray_tuplesafe(keyarr) 

3007 return keyarr 

3008 

3009 _index_shared_docs[ 

3010 "_convert_index_indexer" 

3011 ] = """ 

3012 Convert an Index indexer to the appropriate dtype. 

3013 

3014 Parameters 

3015 ---------- 

3016 keyarr : Index (or sub-class) 

3017 Indexer to convert. 

3018 

3019 Returns 

3020 ------- 

3021 converted_keyarr : Index (or sub-class) 

3022 """ 

3023 

3024 @Appender(_index_shared_docs["_convert_index_indexer"]) 

3025 def _convert_index_indexer(self, keyarr): 

3026 return keyarr 

3027 

3028 _index_shared_docs[ 

3029 "_convert_list_indexer" 

3030 ] = """ 

3031 Convert a list-like indexer to the appropriate dtype. 

3032 

3033 Parameters 

3034 ---------- 

3035 keyarr : Index (or sub-class) 

3036 Indexer to convert. 

3037 kind : iloc, ix, loc, optional 

3038 

3039 Returns 

3040 ------- 

3041 positional indexer or None 

3042 """ 

3043 

3044 @Appender(_index_shared_docs["_convert_list_indexer"]) 

3045 def _convert_list_indexer(self, keyarr, kind=None): 

3046 if ( 

3047 kind in [None, "iloc", "ix"] 

3048 and is_integer_dtype(keyarr) 

3049 and not self.is_floating() 

3050 and not isinstance(keyarr, ABCPeriodIndex) 

3051 ): 

3052 

3053 if self.inferred_type == "mixed-integer": 

3054 indexer = self.get_indexer(keyarr) 

3055 if (indexer >= 0).all(): 

3056 return indexer 

3057 # missing values are flagged as -1 by get_indexer and negative 

3058 # indices are already converted to positive indices in the 

3059 # above if-statement, so the negative flags are changed to 

3060 # values outside the range of indices so as to trigger an 

3061 # IndexError in maybe_convert_indices 

3062 indexer[indexer < 0] = len(self) 

3063 

3064 return maybe_convert_indices(indexer, len(self)) 

3065 

3066 elif not self.inferred_type == "integer": 

3067 keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) 

3068 return keyarr 

3069 

3070 return None 

3071 

3072 def _invalid_indexer(self, form, key): 

3073 """ 

3074 Consistent invalid indexer message. 

3075 """ 

3076 raise TypeError( 

3077 f"cannot do {form} indexing on {type(self)} with these " 

3078 f"indexers [{key}] of {type(key)}" 

3079 ) 

3080 

3081 # -------------------------------------------------------------------- 

3082 # Reindex Methods 

3083 

3084 def _can_reindex(self, indexer): 

3085 """ 

3086 Check if we are allowing reindexing with this particular indexer. 

3087 

3088 Parameters 

3089 ---------- 

3090 indexer : an integer indexer 

3091 

3092 Raises 

3093 ------ 

3094 ValueError if its a duplicate axis 

3095 """ 

3096 

3097 # trying to reindex on an axis with duplicates 

3098 if not self.is_unique and len(indexer): 

3099 raise ValueError("cannot reindex from a duplicate axis") 

3100 

3101 def reindex(self, target, method=None, level=None, limit=None, tolerance=None): 

3102 """ 

3103 Create index with target's values (move/add/delete values 

3104 as necessary). 

3105 

3106 Parameters 

3107 ---------- 

3108 target : an iterable 

3109 

3110 Returns 

3111 ------- 

3112 new_index : pd.Index 

3113 Resulting index. 

3114 indexer : np.ndarray or None 

3115 Indices of output values in original index. 

3116 """ 

3117 # GH6552: preserve names when reindexing to non-named target 

3118 # (i.e. neither Index nor Series). 

3119 preserve_names = not hasattr(target, "name") 

3120 

3121 # GH7774: preserve dtype/tz if target is empty and not an Index. 

3122 target = _ensure_has_len(target) # target may be an iterator 

3123 

3124 if not isinstance(target, Index) and len(target) == 0: 

3125 attrs = self._get_attributes_dict() 

3126 attrs.pop("freq", None) # don't preserve freq 

3127 values = self._data[:0] # appropriately-dtyped empty array 

3128 target = self._simple_new(values, dtype=self.dtype, **attrs) 

3129 else: 

3130 target = ensure_index(target) 

3131 

3132 if level is not None: 

3133 if method is not None: 

3134 raise TypeError("Fill method not supported if level passed") 

3135 _, indexer, _ = self._join_level( 

3136 target, level, how="right", return_indexers=True 

3137 ) 

3138 else: 

3139 if self.equals(target): 

3140 indexer = None 

3141 else: 

3142 # check is_overlapping for IntervalIndex compat 

3143 if self.is_unique and not getattr(self, "is_overlapping", False): 

3144 indexer = self.get_indexer( 

3145 target, method=method, limit=limit, tolerance=tolerance 

3146 ) 

3147 else: 

3148 if method is not None or limit is not None: 

3149 raise ValueError( 

3150 "cannot reindex a non-unique index " 

3151 "with a method or limit" 

3152 ) 

3153 indexer, missing = self.get_indexer_non_unique(target) 

3154 

3155 if preserve_names and target.nlevels == 1 and target.name != self.name: 

3156 target = target.copy() 

3157 target.name = self.name 

3158 

3159 return target, indexer 

3160 

3161 def _reindex_non_unique(self, target): 

3162 """ 

3163 Create a new index with target's values (move/add/delete values as 

3164 necessary) use with non-unique Index and a possibly non-unique target. 

3165 

3166 Parameters 

3167 ---------- 

3168 target : an iterable 

3169 

3170 Returns 

3171 ------- 

3172 new_index : pd.Index 

3173 Resulting index. 

3174 indexer : np.ndarray or None 

3175 Indices of output values in original index. 

3176 

3177 """ 

3178 

3179 target = ensure_index(target) 

3180 indexer, missing = self.get_indexer_non_unique(target) 

3181 check = indexer != -1 

3182 new_labels = self.take(indexer[check]) 

3183 new_indexer = None 

3184 

3185 if len(missing): 

3186 length = np.arange(len(indexer)) 

3187 

3188 missing = ensure_platform_int(missing) 

3189 missing_labels = target.take(missing) 

3190 missing_indexer = ensure_int64(length[~check]) 

3191 cur_labels = self.take(indexer[check]).values 

3192 cur_indexer = ensure_int64(length[check]) 

3193 

3194 new_labels = np.empty(tuple([len(indexer)]), dtype=object) 

3195 new_labels[cur_indexer] = cur_labels 

3196 new_labels[missing_indexer] = missing_labels 

3197 

3198 # a unique indexer 

3199 if target.is_unique: 

3200 

3201 # see GH5553, make sure we use the right indexer 

3202 new_indexer = np.arange(len(indexer)) 

3203 new_indexer[cur_indexer] = np.arange(len(cur_labels)) 

3204 new_indexer[missing_indexer] = -1 

3205 

3206 # we have a non_unique selector, need to use the original 

3207 # indexer here 

3208 else: 

3209 

3210 # need to retake to have the same size as the indexer 

3211 indexer[~check] = -1 

3212 

3213 # reset the new indexer to account for the new size 

3214 new_indexer = np.arange(len(self.take(indexer))) 

3215 new_indexer[~check] = -1 

3216 

3217 new_index = self._shallow_copy_with_infer(new_labels) 

3218 return new_index, indexer, new_indexer 

3219 

3220 # -------------------------------------------------------------------- 

3221 # Join Methods 

3222 

3223 _index_shared_docs[ 

3224 "join" 

3225 ] = """ 

3226 Compute join_index and indexers to conform data 

3227 structures to the new index. 

3228 

3229 Parameters 

3230 ---------- 

3231 other : Index 

3232 how : {'left', 'right', 'inner', 'outer'} 

3233 level : int or level name, default None 

3234 return_indexers : bool, default False 

3235 sort : bool, default False 

3236 Sort the join keys lexicographically in the result Index. If False, 

3237 the order of the join keys depends on the join type (how keyword). 

3238 

3239 Returns 

3240 ------- 

3241 join_index, (left_indexer, right_indexer) 

3242 """ 

3243 

3244 @Appender(_index_shared_docs["join"]) 

3245 def join(self, other, how="left", level=None, return_indexers=False, sort=False): 

3246 self_is_mi = isinstance(self, ABCMultiIndex) 

3247 other_is_mi = isinstance(other, ABCMultiIndex) 

3248 

3249 # try to figure out the join level 

3250 # GH3662 

3251 if level is None and (self_is_mi or other_is_mi): 

3252 

3253 # have the same levels/names so a simple join 

3254 if self.names == other.names: 

3255 pass 

3256 else: 

3257 return self._join_multi(other, how=how, return_indexers=return_indexers) 

3258 

3259 # join on the level 

3260 if level is not None and (self_is_mi or other_is_mi): 

3261 return self._join_level( 

3262 other, level, how=how, return_indexers=return_indexers 

3263 ) 

3264 

3265 other = ensure_index(other) 

3266 

3267 if len(other) == 0 and how in ("left", "outer"): 

3268 join_index = self._shallow_copy() 

3269 if return_indexers: 

3270 rindexer = np.repeat(-1, len(join_index)) 

3271 return join_index, None, rindexer 

3272 else: 

3273 return join_index 

3274 

3275 if len(self) == 0 and how in ("right", "outer"): 

3276 join_index = other._shallow_copy() 

3277 if return_indexers: 

3278 lindexer = np.repeat(-1, len(join_index)) 

3279 return join_index, lindexer, None 

3280 else: 

3281 return join_index 

3282 

3283 if self._join_precedence < other._join_precedence: 

3284 how = {"right": "left", "left": "right"}.get(how, how) 

3285 result = other.join( 

3286 self, how=how, level=level, return_indexers=return_indexers 

3287 ) 

3288 if return_indexers: 

3289 x, y, z = result 

3290 result = x, z, y 

3291 return result 

3292 

3293 if not is_dtype_equal(self.dtype, other.dtype): 

3294 this = self.astype("O") 

3295 other = other.astype("O") 

3296 return this.join(other, how=how, return_indexers=return_indexers) 

3297 

3298 _validate_join_method(how) 

3299 

3300 if not self.is_unique and not other.is_unique: 

3301 return self._join_non_unique( 

3302 other, how=how, return_indexers=return_indexers 

3303 ) 

3304 elif not self.is_unique or not other.is_unique: 

3305 if self.is_monotonic and other.is_monotonic: 

3306 return self._join_monotonic( 

3307 other, how=how, return_indexers=return_indexers 

3308 ) 

3309 else: 

3310 return self._join_non_unique( 

3311 other, how=how, return_indexers=return_indexers 

3312 ) 

3313 elif self.is_monotonic and other.is_monotonic: 

3314 try: 

3315 return self._join_monotonic( 

3316 other, how=how, return_indexers=return_indexers 

3317 ) 

3318 except TypeError: 

3319 pass 

3320 

3321 if how == "left": 

3322 join_index = self 

3323 elif how == "right": 

3324 join_index = other 

3325 elif how == "inner": 

3326 # TODO: sort=False here for backwards compat. It may 

3327 # be better to use the sort parameter passed into join 

3328 join_index = self.intersection(other, sort=False) 

3329 elif how == "outer": 

3330 # TODO: sort=True here for backwards compat. It may 

3331 # be better to use the sort parameter passed into join 

3332 join_index = self.union(other) 

3333 

3334 if sort: 

3335 join_index = join_index.sort_values() 

3336 

3337 if return_indexers: 

3338 if join_index is self: 

3339 lindexer = None 

3340 else: 

3341 lindexer = self.get_indexer(join_index) 

3342 if join_index is other: 

3343 rindexer = None 

3344 else: 

3345 rindexer = other.get_indexer(join_index) 

3346 return join_index, lindexer, rindexer 

3347 else: 

3348 return join_index 

3349 

3350 def _join_multi(self, other, how, return_indexers=True): 

3351 from pandas.core.indexes.multi import MultiIndex 

3352 from pandas.core.reshape.merge import _restore_dropped_levels_multijoin 

3353 

3354 # figure out join names 

3355 self_names = set(com.not_none(*self.names)) 

3356 other_names = set(com.not_none(*other.names)) 

3357 overlap = self_names & other_names 

3358 

3359 # need at least 1 in common 

3360 if not overlap: 

3361 raise ValueError("cannot join with no overlapping index names") 

3362 

3363 self_is_mi = isinstance(self, MultiIndex) 

3364 other_is_mi = isinstance(other, MultiIndex) 

3365 

3366 if self_is_mi and other_is_mi: 

3367 

3368 # Drop the non-matching levels from left and right respectively 

3369 ldrop_names = list(self_names - overlap) 

3370 rdrop_names = list(other_names - overlap) 

3371 

3372 # if only the order differs 

3373 if not len(ldrop_names + rdrop_names): 

3374 self_jnlevels = self 

3375 other_jnlevels = other.reorder_levels(self.names) 

3376 else: 

3377 self_jnlevels = self.droplevel(ldrop_names) 

3378 other_jnlevels = other.droplevel(rdrop_names) 

3379 

3380 # Join left and right 

3381 # Join on same leveled multi-index frames is supported 

3382 join_idx, lidx, ridx = self_jnlevels.join( 

3383 other_jnlevels, how, return_indexers=True 

3384 ) 

3385 

3386 # Restore the dropped levels 

3387 # Returned index level order is 

3388 # common levels, ldrop_names, rdrop_names 

3389 dropped_names = ldrop_names + rdrop_names 

3390 

3391 levels, codes, names = _restore_dropped_levels_multijoin( 

3392 self, other, dropped_names, join_idx, lidx, ridx 

3393 ) 

3394 

3395 # Re-create the multi-index 

3396 multi_join_idx = MultiIndex( 

3397 levels=levels, codes=codes, names=names, verify_integrity=False 

3398 ) 

3399 

3400 multi_join_idx = multi_join_idx.remove_unused_levels() 

3401 

3402 return multi_join_idx, lidx, ridx 

3403 

3404 jl = list(overlap)[0] 

3405 

3406 # Case where only one index is multi 

3407 # make the indices into mi's that match 

3408 flip_order = False 

3409 if self_is_mi: 

3410 self, other = other, self 

3411 flip_order = True 

3412 # flip if join method is right or left 

3413 how = {"right": "left", "left": "right"}.get(how, how) 

3414 

3415 level = other.names.index(jl) 

3416 result = self._join_level( 

3417 other, level, how=how, return_indexers=return_indexers 

3418 ) 

3419 

3420 if flip_order: 

3421 if isinstance(result, tuple): 

3422 return result[0], result[2], result[1] 

3423 return result 

3424 

3425 def _join_non_unique(self, other, how="left", return_indexers=False): 

3426 from pandas.core.reshape.merge import _get_join_indexers 

3427 

3428 left_idx, right_idx = _get_join_indexers( 

3429 [self._ndarray_values], [other._ndarray_values], how=how, sort=True 

3430 ) 

3431 

3432 left_idx = ensure_platform_int(left_idx) 

3433 right_idx = ensure_platform_int(right_idx) 

3434 

3435 join_index = np.asarray(self._ndarray_values.take(left_idx)) 

3436 mask = left_idx == -1 

3437 np.putmask(join_index, mask, other._ndarray_values.take(right_idx)) 

3438 

3439 join_index = self._wrap_joined_index(join_index, other) 

3440 

3441 if return_indexers: 

3442 return join_index, left_idx, right_idx 

3443 else: 

3444 return join_index 

3445 

3446 def _join_level( 

3447 self, other, level, how="left", return_indexers=False, keep_order=True 

3448 ): 

3449 """ 

3450 The join method *only* affects the level of the resulting 

3451 MultiIndex. Otherwise it just exactly aligns the Index data to the 

3452 labels of the level in the MultiIndex. 

3453 

3454 If ```keep_order == True```, the order of the data indexed by the 

3455 MultiIndex will not be changed; otherwise, it will tie out 

3456 with `other`. 

3457 """ 

3458 from pandas.core.indexes.multi import MultiIndex 

3459 

3460 def _get_leaf_sorter(labels): 

3461 """ 

3462 Returns sorter for the inner most level while preserving the 

3463 order of higher levels. 

3464 """ 

3465 if labels[0].size == 0: 

3466 return np.empty(0, dtype="int64") 

3467 

3468 if len(labels) == 1: 

3469 lab = ensure_int64(labels[0]) 

3470 sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) 

3471 return sorter 

3472 

3473 # find indexers of beginning of each set of 

3474 # same-key labels w.r.t all but last level 

3475 tic = labels[0][:-1] != labels[0][1:] 

3476 for lab in labels[1:-1]: 

3477 tic |= lab[:-1] != lab[1:] 

3478 

3479 starts = np.hstack(([True], tic, [True])).nonzero()[0] 

3480 lab = ensure_int64(labels[-1]) 

3481 return lib.get_level_sorter(lab, ensure_int64(starts)) 

3482 

3483 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

3484 raise TypeError("Join on level between two MultiIndex objects is ambiguous") 

3485 

3486 left, right = self, other 

3487 

3488 flip_order = not isinstance(self, MultiIndex) 

3489 if flip_order: 

3490 left, right = right, left 

3491 how = {"right": "left", "left": "right"}.get(how, how) 

3492 

3493 level = left._get_level_number(level) 

3494 old_level = left.levels[level] 

3495 

3496 if not right.is_unique: 

3497 raise NotImplementedError( 

3498 "Index._join_level on non-unique index is not implemented" 

3499 ) 

3500 

3501 new_level, left_lev_indexer, right_lev_indexer = old_level.join( 

3502 right, how=how, return_indexers=True 

3503 ) 

3504 

3505 if left_lev_indexer is None: 

3506 if keep_order or len(left) == 0: 

3507 left_indexer = None 

3508 join_index = left 

3509 else: # sort the leaves 

3510 left_indexer = _get_leaf_sorter(left.codes[: level + 1]) 

3511 join_index = left[left_indexer] 

3512 

3513 else: 

3514 left_lev_indexer = ensure_int64(left_lev_indexer) 

3515 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) 

3516 

3517 new_lev_codes = algos.take_nd( 

3518 rev_indexer, left.codes[level], allow_fill=False 

3519 ) 

3520 

3521 new_codes = list(left.codes) 

3522 new_codes[level] = new_lev_codes 

3523 

3524 new_levels = list(left.levels) 

3525 new_levels[level] = new_level 

3526 

3527 if keep_order: # just drop missing values. o.w. keep order 

3528 left_indexer = np.arange(len(left), dtype=np.intp) 

3529 mask = new_lev_codes != -1 

3530 if not mask.all(): 

3531 new_codes = [lab[mask] for lab in new_codes] 

3532 left_indexer = left_indexer[mask] 

3533 

3534 else: # tie out the order with other 

3535 if level == 0: # outer most level, take the fast route 

3536 ngroups = 1 + new_lev_codes.max() 

3537 left_indexer, counts = libalgos.groupsort_indexer( 

3538 new_lev_codes, ngroups 

3539 ) 

3540 

3541 # missing values are placed first; drop them! 

3542 left_indexer = left_indexer[counts[0] :] 

3543 new_codes = [lab[left_indexer] for lab in new_codes] 

3544 

3545 else: # sort the leaves 

3546 mask = new_lev_codes != -1 

3547 mask_all = mask.all() 

3548 if not mask_all: 

3549 new_codes = [lab[mask] for lab in new_codes] 

3550 

3551 left_indexer = _get_leaf_sorter(new_codes[: level + 1]) 

3552 new_codes = [lab[left_indexer] for lab in new_codes] 

3553 

3554 # left_indexers are w.r.t masked frame. 

3555 # reverse to original frame! 

3556 if not mask_all: 

3557 left_indexer = mask.nonzero()[0][left_indexer] 

3558 

3559 join_index = MultiIndex( 

3560 levels=new_levels, 

3561 codes=new_codes, 

3562 names=left.names, 

3563 verify_integrity=False, 

3564 ) 

3565 

3566 if right_lev_indexer is not None: 

3567 right_indexer = algos.take_nd( 

3568 right_lev_indexer, join_index.codes[level], allow_fill=False 

3569 ) 

3570 else: 

3571 right_indexer = join_index.codes[level] 

3572 

3573 if flip_order: 

3574 left_indexer, right_indexer = right_indexer, left_indexer 

3575 

3576 if return_indexers: 

3577 left_indexer = ( 

3578 None if left_indexer is None else ensure_platform_int(left_indexer) 

3579 ) 

3580 right_indexer = ( 

3581 None if right_indexer is None else ensure_platform_int(right_indexer) 

3582 ) 

3583 return join_index, left_indexer, right_indexer 

3584 else: 

3585 return join_index 

3586 

3587 def _join_monotonic(self, other, how="left", return_indexers=False): 

3588 if self.equals(other): 

3589 ret_index = other if how == "right" else self 

3590 if return_indexers: 

3591 return ret_index, None, None 

3592 else: 

3593 return ret_index 

3594 

3595 sv = self._ndarray_values 

3596 ov = other._ndarray_values 

3597 

3598 if self.is_unique and other.is_unique: 

3599 # We can perform much better than the general case 

3600 if how == "left": 

3601 join_index = self 

3602 lidx = None 

3603 ridx = self._left_indexer_unique(sv, ov) 

3604 elif how == "right": 

3605 join_index = other 

3606 lidx = self._left_indexer_unique(ov, sv) 

3607 ridx = None 

3608 elif how == "inner": 

3609 join_index, lidx, ridx = self._inner_indexer(sv, ov) 

3610 join_index = self._wrap_joined_index(join_index, other) 

3611 elif how == "outer": 

3612 join_index, lidx, ridx = self._outer_indexer(sv, ov) 

3613 join_index = self._wrap_joined_index(join_index, other) 

3614 else: 

3615 if how == "left": 

3616 join_index, lidx, ridx = self._left_indexer(sv, ov) 

3617 elif how == "right": 

3618 join_index, ridx, lidx = self._left_indexer(ov, sv) 

3619 elif how == "inner": 

3620 join_index, lidx, ridx = self._inner_indexer(sv, ov) 

3621 elif how == "outer": 

3622 join_index, lidx, ridx = self._outer_indexer(sv, ov) 

3623 join_index = self._wrap_joined_index(join_index, other) 

3624 

3625 if return_indexers: 

3626 lidx = None if lidx is None else ensure_platform_int(lidx) 

3627 ridx = None if ridx is None else ensure_platform_int(ridx) 

3628 return join_index, lidx, ridx 

3629 else: 

3630 return join_index 

3631 

3632 def _wrap_joined_index(self, joined, other): 

3633 name = get_op_result_name(self, other) 

3634 return Index(joined, name=name) 

3635 

3636 # -------------------------------------------------------------------- 

3637 # Uncategorized Methods 

3638 

3639 @property 

3640 def values(self): 

3641 """ 

3642 Return an array representing the data in the Index. 

3643 

3644 .. warning:: 

3645 

3646 We recommend using :attr:`Index.array` or 

3647 :meth:`Index.to_numpy`, depending on whether you need 

3648 a reference to the underlying data or a NumPy array. 

3649 

3650 Returns 

3651 ------- 

3652 array: numpy.ndarray or ExtensionArray 

3653 

3654 See Also 

3655 -------- 

3656 Index.array : Reference to the underlying data. 

3657 Index.to_numpy : A NumPy array representing the underlying data. 

3658 """ 

3659 return self._data.view(np.ndarray) 

3660 

3661 @cache_readonly 

3662 @Appender(IndexOpsMixin.array.__doc__) # type: ignore 

3663 def array(self) -> ExtensionArray: 

3664 array = self._data 

3665 if isinstance(array, np.ndarray): 

3666 from pandas.core.arrays.numpy_ import PandasArray 

3667 

3668 array = PandasArray(array) 

3669 return array 

3670 

3671 @property 

3672 def _values(self) -> Union[ExtensionArray, ABCIndexClass, np.ndarray]: 

3673 # TODO(EA): remove index types as they become extension arrays 

3674 """ 

3675 The best array representation. 

3676 

3677 This is an ndarray, ExtensionArray, or Index subclass. This differs 

3678 from ``_ndarray_values``, which always returns an ndarray. 

3679 

3680 Both ``_values`` and ``_ndarray_values`` are consistent between 

3681 ``Series`` and ``Index``. 

3682 

3683 It may differ from the public '.values' method. 

3684 

3685 index | values | _values | _ndarray_values | 

3686 ----------------- | --------------- | ------------- | --------------- | 

3687 Index | ndarray | ndarray | ndarray | 

3688 CategoricalIndex | Categorical | Categorical | ndarray[int] | 

3689 DatetimeIndex | ndarray[M8ns] | ndarray[M8ns] | ndarray[M8ns] | 

3690 DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] | 

3691 PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] | 

3692 IntervalIndex | IntervalArray | IntervalArray | ndarray[object] | 

3693 

3694 See Also 

3695 -------- 

3696 values 

3697 _ndarray_values 

3698 """ 

3699 return self._data 

3700 

3701 def _internal_get_values(self): 

3702 """ 

3703 Return `Index` data as an `numpy.ndarray`. 

3704 

3705 Returns 

3706 ------- 

3707 numpy.ndarray 

3708 A one-dimensional numpy array of the `Index` values. 

3709 

3710 See Also 

3711 -------- 

3712 Index.values : The attribute that _internal_get_values wraps. 

3713 

3714 Examples 

3715 -------- 

3716 Getting the `Index` values of a `DataFrame`: 

3717 

3718 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 

3719 ... index=['a', 'b', 'c'], columns=['A', 'B', 'C']) 

3720 >>> df 

3721 A B C 

3722 a 1 2 3 

3723 b 4 5 6 

3724 c 7 8 9 

3725 >>> df.index._internal_get_values() 

3726 array(['a', 'b', 'c'], dtype=object) 

3727 

3728 Standalone `Index` values: 

3729 

3730 >>> idx = pd.Index(['1', '2', '3']) 

3731 >>> idx._internal_get_values() 

3732 array(['1', '2', '3'], dtype=object) 

3733 

3734 `MultiIndex` arrays also have only one dimension: 

3735 

3736 >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']], 

3737 ... names=('number', 'letter')) 

3738 >>> midx._internal_get_values() 

3739 array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object) 

3740 >>> midx._internal_get_values().ndim 

3741 1 

3742 """ 

3743 return self.values 

3744 

3745 @Appender(IndexOpsMixin.memory_usage.__doc__) 

3746 def memory_usage(self, deep=False): 

3747 result = super().memory_usage(deep=deep) 

3748 

3749 # include our engine hashtable 

3750 result += self._engine.sizeof(deep=deep) 

3751 return result 

3752 

3753 _index_shared_docs[ 

3754 "where" 

3755 ] = """ 

3756 Return an Index of same shape as self and whose corresponding 

3757 entries are from self where cond is True and otherwise are from 

3758 other. 

3759 

3760 Parameters 

3761 ---------- 

3762 cond : bool array-like with the same length as self 

3763 other : scalar, or array-like 

3764 

3765 Returns 

3766 ------- 

3767 Index 

3768 """ 

3769 

3770 @Appender(_index_shared_docs["where"]) 

3771 def where(self, cond, other=None): 

3772 if other is None: 

3773 other = self._na_value 

3774 

3775 dtype = self.dtype 

3776 values = self.values 

3777 

3778 if is_bool(other) or is_bool_dtype(other): 

3779 

3780 # bools force casting 

3781 values = values.astype(object) 

3782 dtype = None 

3783 

3784 values = np.where(cond, values, other) 

3785 

3786 if self._is_numeric_dtype and np.any(isna(values)): 

3787 # We can't coerce to the numeric dtype of "self" (unless 

3788 # it's float) if there are NaN values in our output. 

3789 dtype = None 

3790 

3791 return self._shallow_copy_with_infer(values, dtype=dtype) 

3792 

3793 # construction helpers 

3794 @classmethod 

3795 def _scalar_data_error(cls, data): 

3796 # We return the TypeError so that we can raise it from the constructor 

3797 # in order to keep mypy happy 

3798 return TypeError( 

3799 f"{cls.__name__}(...) must be called with a collection of some " 

3800 f"kind, {repr(data)} was passed" 

3801 ) 

3802 

3803 @classmethod 

3804 def _string_data_error(cls, data): 

3805 raise TypeError( 

3806 "String dtype not supported, you may need " 

3807 "to explicitly cast to a numeric type" 

3808 ) 

3809 

3810 def _coerce_scalar_to_index(self, item): 

3811 """ 

3812 We need to coerce a scalar to a compat for our index type. 

3813 

3814 Parameters 

3815 ---------- 

3816 item : scalar item to coerce 

3817 """ 

3818 dtype = self.dtype 

3819 

3820 if self._is_numeric_dtype and isna(item): 

3821 # We can't coerce to the numeric dtype of "self" (unless 

3822 # it's float) if there are NaN values in our output. 

3823 dtype = None 

3824 

3825 return Index([item], dtype=dtype, **self._get_attributes_dict()) 

3826 

3827 def _to_safe_for_reshape(self): 

3828 """ 

3829 Convert to object if we are a categorical. 

3830 """ 

3831 return self 

3832 

3833 def _convert_for_op(self, value): 

3834 """ 

3835 Convert value to be insertable to ndarray. 

3836 """ 

3837 return value 

3838 

3839 def _assert_can_do_op(self, value): 

3840 """ 

3841 Check value is valid for scalar op. 

3842 """ 

3843 if not is_scalar(value): 

3844 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") 

3845 

3846 @property 

3847 def _has_complex_internals(self): 

3848 """ 

3849 Indicates if an index is not directly backed by a numpy array 

3850 """ 

3851 # used to avoid libreduction code paths, which raise or require conversion 

3852 return False 

3853 

3854 def _is_memory_usage_qualified(self) -> bool: 

3855 """ 

3856 Return a boolean if we need a qualified .info display. 

3857 """ 

3858 return self.is_object() 

3859 

3860 def is_type_compatible(self, kind) -> bool: 

3861 """ 

3862 Whether the index type is compatible with the provided type. 

3863 """ 

3864 return kind == self.inferred_type 

3865 

3866 _index_shared_docs[ 

3867 "contains" 

3868 ] = """ 

3869 Return a boolean indicating whether the provided key is in the index. 

3870 

3871 Parameters 

3872 ---------- 

3873 key : label 

3874 The key to check if it is present in the index. 

3875 

3876 Returns 

3877 ------- 

3878 bool 

3879 Whether the key search is in the index. 

3880 

3881 See Also 

3882 -------- 

3883 Index.isin : Returns an ndarray of boolean dtype indicating whether the 

3884 list-like key is in the index. 

3885 

3886 Examples 

3887 -------- 

3888 >>> idx = pd.Index([1, 2, 3, 4]) 

3889 >>> idx 

3890 Int64Index([1, 2, 3, 4], dtype='int64') 

3891 

3892 >>> 2 in idx 

3893 True 

3894 >>> 6 in idx 

3895 False 

3896 """ 

3897 

3898 @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) 

3899 def __contains__(self, key) -> bool: 

3900 hash(key) 

3901 try: 

3902 return key in self._engine 

3903 except (OverflowError, TypeError, ValueError): 

3904 return False 

3905 

3906 def __hash__(self): 

3907 raise TypeError(f"unhashable type: {repr(type(self).__name__)}") 

3908 

3909 def __setitem__(self, key, value): 

3910 raise TypeError("Index does not support mutable operations") 

3911 

3912 def __getitem__(self, key): 

3913 """ 

3914 Override numpy.ndarray's __getitem__ method to work as desired. 

3915 

3916 This function adds lists and Series as valid boolean indexers 

3917 (ndarrays only supports ndarray with dtype=bool). 

3918 

3919 If resulting ndim != 1, plain ndarray is returned instead of 

3920 corresponding `Index` subclass. 

3921 

3922 """ 

3923 # There's no custom logic to be implemented in __getslice__, so it's 

3924 # not overloaded intentionally. 

3925 getitem = self._data.__getitem__ 

3926 promote = self._shallow_copy 

3927 

3928 if is_scalar(key): 

3929 key = com.cast_scalar_indexer(key) 

3930 return getitem(key) 

3931 

3932 if isinstance(key, slice): 

3933 # This case is separated from the conditional above to avoid 

3934 # pessimization of basic indexing. 

3935 return promote(getitem(key)) 

3936 

3937 if com.is_bool_indexer(key): 

3938 key = np.asarray(key, dtype=bool) 

3939 

3940 key = com.values_from_object(key) 

3941 result = getitem(key) 

3942 if not is_scalar(result): 

3943 if np.ndim(result) > 1: 

3944 deprecate_ndim_indexing(result) 

3945 return result 

3946 return promote(result) 

3947 else: 

3948 return result 

3949 

3950 def _can_hold_identifiers_and_holds_name(self, name) -> bool: 

3951 """ 

3952 Faster check for ``name in self`` when we know `name` is a Python 

3953 identifier (e.g. in NDFrame.__getattr__, which hits this to support 

3954 . key lookup). For indexes that can't hold identifiers (everything 

3955 but object & categorical) we just return False. 

3956 

3957 https://github.com/pandas-dev/pandas/issues/19764 

3958 """ 

3959 if self.is_object() or self.is_categorical(): 

3960 return name in self 

3961 return False 

3962 

3963 def append(self, other): 

3964 """ 

3965 Append a collection of Index options together. 

3966 

3967 Parameters 

3968 ---------- 

3969 other : Index or list/tuple of indices 

3970 

3971 Returns 

3972 ------- 

3973 appended : Index 

3974 """ 

3975 

3976 to_concat = [self] 

3977 

3978 if isinstance(other, (list, tuple)): 

3979 to_concat = to_concat + list(other) 

3980 else: 

3981 to_concat.append(other) 

3982 

3983 for obj in to_concat: 

3984 if not isinstance(obj, Index): 

3985 raise TypeError("all inputs must be Index") 

3986 

3987 names = {obj.name for obj in to_concat} 

3988 name = None if len(names) > 1 else self.name 

3989 

3990 return self._concat(to_concat, name) 

3991 

3992 def _concat(self, to_concat, name): 

3993 

3994 typs = _concat.get_dtype_kinds(to_concat) 

3995 

3996 if len(typs) == 1: 

3997 return self._concat_same_dtype(to_concat, name=name) 

3998 return Index._concat_same_dtype(self, to_concat, name=name) 

3999 

4000 def _concat_same_dtype(self, to_concat, name): 

4001 """ 

4002 Concatenate to_concat which has the same class. 

4003 """ 

4004 # must be overridden in specific classes 

4005 klasses = ( 

4006 ABCDatetimeIndex, 

4007 ABCTimedeltaIndex, 

4008 ABCPeriodIndex, 

4009 ExtensionArray, 

4010 ABCIntervalIndex, 

4011 ) 

4012 to_concat = [ 

4013 x.astype(object) if isinstance(x, klasses) else x for x in to_concat 

4014 ] 

4015 

4016 self = to_concat[0] 

4017 attribs = self._get_attributes_dict() 

4018 attribs["name"] = name 

4019 

4020 to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] 

4021 

4022 return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) 

4023 

4024 def putmask(self, mask, value): 

4025 """ 

4026 Return a new Index of the values set with the mask. 

4027 

4028 Returns 

4029 ------- 

4030 Index 

4031 

4032 See Also 

4033 -------- 

4034 numpy.ndarray.putmask 

4035 """ 

4036 values = self.values.copy() 

4037 try: 

4038 np.putmask(values, mask, self._convert_for_op(value)) 

4039 return self._shallow_copy(values) 

4040 except (ValueError, TypeError) as err: 

4041 if is_object_dtype(self): 

4042 raise err 

4043 

4044 # coerces to object 

4045 return self.astype(object).putmask(mask, value) 

4046 

4047 def equals(self, other) -> bool: 

4048 """ 

4049 Determine if two Index objects contain the same elements. 

4050 

4051 Returns 

4052 ------- 

4053 bool 

4054 True if "other" is an Index and it has the same elements as calling 

4055 index; False otherwise. 

4056 """ 

4057 if self.is_(other): 

4058 return True 

4059 

4060 if not isinstance(other, Index): 

4061 return False 

4062 

4063 if is_object_dtype(self) and not is_object_dtype(other): 

4064 # if other is not object, use other's logic for coercion 

4065 return other.equals(self) 

4066 

4067 if isinstance(other, ABCMultiIndex): 

4068 # d-level MultiIndex can equal d-tuple Index 

4069 if not is_object_dtype(self.dtype): 

4070 if self.nlevels != other.nlevels: 

4071 return False 

4072 

4073 return array_equivalent( 

4074 com.values_from_object(self), com.values_from_object(other) 

4075 ) 

4076 

4077 def identical(self, other) -> bool: 

4078 """ 

4079 Similar to equals, but check that other comparable attributes are 

4080 also equal. 

4081 

4082 Returns 

4083 ------- 

4084 bool 

4085 If two Index objects have equal elements and same type True, 

4086 otherwise False. 

4087 """ 

4088 return ( 

4089 self.equals(other) 

4090 and all( 

4091 ( 

4092 getattr(self, c, None) == getattr(other, c, None) 

4093 for c in self._comparables 

4094 ) 

4095 ) 

4096 and type(self) == type(other) 

4097 ) 

4098 

4099 def asof(self, label): 

4100 """ 

4101 Return the label from the index, or, if not present, the previous one. 

4102 

4103 Assuming that the index is sorted, return the passed index label if it 

4104 is in the index, or return the previous index label if the passed one 

4105 is not in the index. 

4106 

4107 Parameters 

4108 ---------- 

4109 label : object 

4110 The label up to which the method returns the latest index label. 

4111 

4112 Returns 

4113 ------- 

4114 object 

4115 The passed label if it is in the index. The previous label if the 

4116 passed label is not in the sorted index or `NaN` if there is no 

4117 such label. 

4118 

4119 See Also 

4120 -------- 

4121 Series.asof : Return the latest value in a Series up to the 

4122 passed index. 

4123 merge_asof : Perform an asof merge (similar to left join but it 

4124 matches on nearest key rather than equal key). 

4125 Index.get_loc : An `asof` is a thin wrapper around `get_loc` 

4126 with method='pad'. 

4127 

4128 Examples 

4129 -------- 

4130 `Index.asof` returns the latest index label up to the passed label. 

4131 

4132 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) 

4133 >>> idx.asof('2014-01-01') 

4134 '2013-12-31' 

4135 

4136 If the label is in the index, the method returns the passed label. 

4137 

4138 >>> idx.asof('2014-01-02') 

4139 '2014-01-02' 

4140 

4141 If all of the labels in the index are later than the passed label, 

4142 NaN is returned. 

4143 

4144 >>> idx.asof('1999-01-02') 

4145 nan 

4146 

4147 If the index is not sorted, an error is raised. 

4148 

4149 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', 

4150 ... '2014-01-03']) 

4151 >>> idx_not_sorted.asof('2013-12-31') 

4152 Traceback (most recent call last): 

4153 ValueError: index must be monotonic increasing or decreasing 

4154 """ 

4155 try: 

4156 loc = self.get_loc(label, method="pad") 

4157 except KeyError: 

4158 return self._na_value 

4159 else: 

4160 if isinstance(loc, slice): 

4161 loc = loc.indices(len(self))[-1] 

4162 return self[loc] 

4163 

4164 def asof_locs(self, where, mask): 

4165 """ 

4166 Find the locations (indices) of the labels from the index for 

4167 every entry in the `where` argument. 

4168 

4169 As in the `asof` function, if the label (a particular entry in 

4170 `where`) is not in the index, the latest index label up to the 

4171 passed label is chosen and its index returned. 

4172 

4173 If all of the labels in the index are later than a label in `where`, 

4174 -1 is returned. 

4175 

4176 `mask` is used to ignore NA values in the index during calculation. 

4177 

4178 Parameters 

4179 ---------- 

4180 where : Index 

4181 An Index consisting of an array of timestamps. 

4182 mask : array-like 

4183 Array of booleans denoting where values in the original 

4184 data are not NA. 

4185 

4186 Returns 

4187 ------- 

4188 numpy.ndarray 

4189 An array of locations (indices) of the labels from the Index 

4190 which correspond to the return values of the `asof` function 

4191 for every element in `where`. 

4192 """ 

4193 locs = self.values[mask].searchsorted(where.values, side="right") 

4194 locs = np.where(locs > 0, locs - 1, 0) 

4195 

4196 result = np.arange(len(self))[mask].take(locs) 

4197 

4198 first = mask.argmax() 

4199 result[(locs == 0) & (where.values < self.values[first])] = -1 

4200 

4201 return result 

4202 

4203 def sort_values(self, return_indexer=False, ascending=True): 

4204 """ 

4205 Return a sorted copy of the index. 

4206 

4207 Return a sorted copy of the index, and optionally return the indices 

4208 that sorted the index itself. 

4209 

4210 Parameters 

4211 ---------- 

4212 return_indexer : bool, default False 

4213 Should the indices that would sort the index be returned. 

4214 ascending : bool, default True 

4215 Should the index values be sorted in an ascending order. 

4216 

4217 Returns 

4218 ------- 

4219 sorted_index : pandas.Index 

4220 Sorted copy of the index. 

4221 indexer : numpy.ndarray, optional 

4222 The indices that the index itself was sorted by. 

4223 

4224 See Also 

4225 -------- 

4226 Series.sort_values : Sort values of a Series. 

4227 DataFrame.sort_values : Sort values in a DataFrame. 

4228 

4229 Examples 

4230 -------- 

4231 >>> idx = pd.Index([10, 100, 1, 1000]) 

4232 >>> idx 

4233 Int64Index([10, 100, 1, 1000], dtype='int64') 

4234 

4235 Sort values in ascending order (default behavior). 

4236 

4237 >>> idx.sort_values() 

4238 Int64Index([1, 10, 100, 1000], dtype='int64') 

4239 

4240 Sort values in descending order, and also get the indices `idx` was 

4241 sorted by. 

4242 

4243 >>> idx.sort_values(ascending=False, return_indexer=True) 

4244 (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) 

4245 """ 

4246 _as = self.argsort() 

4247 if not ascending: 

4248 _as = _as[::-1] 

4249 

4250 sorted_index = self.take(_as) 

4251 

4252 if return_indexer: 

4253 return sorted_index, _as 

4254 else: 

4255 return sorted_index 

4256 

4257 def sort(self, *args, **kwargs): 

4258 """ 

4259 Use sort_values instead. 

4260 """ 

4261 raise TypeError("cannot sort an Index object in-place, use sort_values instead") 

4262 

4263 def shift(self, periods=1, freq=None): 

4264 """ 

4265 Shift index by desired number of time frequency increments. 

4266 

4267 This method is for shifting the values of datetime-like indexes 

4268 by a specified time increment a given number of times. 

4269 

4270 Parameters 

4271 ---------- 

4272 periods : int, default 1 

4273 Number of periods (or increments) to shift by, 

4274 can be positive or negative. 

4275 freq : pandas.DateOffset, pandas.Timedelta or str, optional 

4276 Frequency increment to shift by. 

4277 If None, the index is shifted by its own `freq` attribute. 

4278 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. 

4279 

4280 Returns 

4281 ------- 

4282 pandas.Index 

4283 Shifted index. 

4284 

4285 See Also 

4286 -------- 

4287 Series.shift : Shift values of Series. 

4288 

4289 Notes 

4290 ----- 

4291 This method is only implemented for datetime-like index classes, 

4292 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. 

4293 

4294 Examples 

4295 -------- 

4296 Put the first 5 month starts of 2011 into an index. 

4297 

4298 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') 

4299 >>> month_starts 

4300 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', 

4301 '2011-05-01'], 

4302 dtype='datetime64[ns]', freq='MS') 

4303 

4304 Shift the index by 10 days. 

4305 

4306 >>> month_starts.shift(10, freq='D') 

4307 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', 

4308 '2011-05-11'], 

4309 dtype='datetime64[ns]', freq=None) 

4310 

4311 The default value of `freq` is the `freq` attribute of the index, 

4312 which is 'MS' (month start) in this example. 

4313 

4314 >>> month_starts.shift(10) 

4315 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', 

4316 '2012-03-01'], 

4317 dtype='datetime64[ns]', freq='MS') 

4318 """ 

4319 raise NotImplementedError(f"Not supported for type {type(self).__name__}") 

4320 

4321 def argsort(self, *args, **kwargs): 

4322 """ 

4323 Return the integer indices that would sort the index. 

4324 

4325 Parameters 

4326 ---------- 

4327 *args 

4328 Passed to `numpy.ndarray.argsort`. 

4329 **kwargs 

4330 Passed to `numpy.ndarray.argsort`. 

4331 

4332 Returns 

4333 ------- 

4334 numpy.ndarray 

4335 Integer indices that would sort the index if used as 

4336 an indexer. 

4337 

4338 See Also 

4339 -------- 

4340 numpy.argsort : Similar method for NumPy arrays. 

4341 Index.sort_values : Return sorted copy of Index. 

4342 

4343 Examples 

4344 -------- 

4345 >>> idx = pd.Index(['b', 'a', 'd', 'c']) 

4346 >>> idx 

4347 Index(['b', 'a', 'd', 'c'], dtype='object') 

4348 

4349 >>> order = idx.argsort() 

4350 >>> order 

4351 array([1, 0, 3, 2]) 

4352 

4353 >>> idx[order] 

4354 Index(['a', 'b', 'c', 'd'], dtype='object') 

4355 """ 

4356 result = self.asi8 

4357 if result is None: 

4358 result = np.array(self) 

4359 return result.argsort(*args, **kwargs) 

4360 

4361 _index_shared_docs[ 

4362 "get_value" 

4363 ] = """ 

4364 Fast lookup of value from 1-dimensional ndarray. Only use this if you 

4365 know what you're doing. 

4366 

4367 Returns 

4368 ------- 

4369 scalar 

4370 A value in the Series with the index of the key value in self. 

4371 """ 

4372 

4373 @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) 

4374 def get_value(self, series, key): 

4375 

4376 # if we have something that is Index-like, then 

4377 # use this, e.g. DatetimeIndex 

4378 # Things like `Series._get_value` (via .at) pass the EA directly here. 

4379 s = extract_array(series, extract_numpy=True) 

4380 if isinstance(s, ExtensionArray): 

4381 if is_scalar(key): 

4382 # GH 20882, 21257 

4383 # First try to convert the key to a location 

4384 # If that fails, raise a KeyError if an integer 

4385 # index, otherwise, see if key is an integer, and 

4386 # try that 

4387 try: 

4388 iloc = self.get_loc(key) 

4389 return s[iloc] 

4390 except KeyError: 

4391 if len(self) > 0 and (self.holds_integer() or self.is_boolean()): 

4392 raise 

4393 elif is_integer(key): 

4394 return s[key] 

4395 else: 

4396 # if key is not a scalar, directly raise an error (the code below 

4397 # would convert to numpy arrays and raise later any way) - GH29926 

4398 raise InvalidIndexError(key) 

4399 

4400 s = com.values_from_object(series) 

4401 k = com.values_from_object(key) 

4402 

4403 k = self._convert_scalar_indexer(k, kind="getitem") 

4404 try: 

4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None)) 

4406 except KeyError as e1: 

4407 if len(self) > 0 and (self.holds_integer() or self.is_boolean()): 

4408 raise 

4409 

4410 try: 

4411 return libindex.get_value_at(s, key) 

4412 except IndexError: 

4413 raise 

4414 except TypeError: 

4415 # generator/iterator-like 

4416 if is_iterator(key): 

4417 raise InvalidIndexError(key) 

4418 else: 

4419 raise e1 

4420 except Exception: 

4421 raise e1 

4422 except TypeError: 

4423 # e.g. "[False] is an invalid key" 

4424 if is_scalar(key): 

4425 raise IndexError(key) 

4426 raise InvalidIndexError(key) 

4427 

4428 def set_value(self, arr, key, value): 

4429 """ 

4430 Fast lookup of value from 1-dimensional ndarray. 

4431 

4432 .. deprecated:: 1.0 

4433 

4434 Notes 

4435 ----- 

4436 Only use this if you know what you're doing. 

4437 """ 

4438 warnings.warn( 

4439 ( 

4440 "The 'set_value' method is deprecated, and " 

4441 "will be removed in a future version." 

4442 ), 

4443 FutureWarning, 

4444 stacklevel=2, 

4445 ) 

4446 self._engine.set_value( 

4447 com.values_from_object(arr), com.values_from_object(key), value 

4448 ) 

4449 

4450 _index_shared_docs[ 

4451 "get_indexer_non_unique" 

4452 ] = """ 

4453 Compute indexer and mask for new index given the current index. The 

4454 indexer should be then used as an input to ndarray.take to align the 

4455 current data to the new index. 

4456 

4457 Parameters 

4458 ---------- 

4459 target : %(target_klass)s 

4460 

4461 Returns 

4462 ------- 

4463 indexer : ndarray of int 

4464 Integers from 0 to n - 1 indicating that the index at these 

4465 positions matches the corresponding target values. Missing values 

4466 in the target are marked by -1. 

4467 missing : ndarray of int 

4468 An indexer into the target of the values not found. 

4469 These correspond to the -1 in the indexer array. 

4470 """ 

4471 

4472 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

4473 def get_indexer_non_unique(self, target): 

4474 target = ensure_index(target) 

4475 pself, ptarget = self._maybe_promote(target) 

4476 if pself is not self or ptarget is not target: 

4477 return pself.get_indexer_non_unique(ptarget) 

4478 

4479 if is_categorical(target): 

4480 tgt_values = np.asarray(target) 

4481 elif self.is_all_dates and target.is_all_dates: # GH 30399 

4482 tgt_values = target.asi8 

4483 else: 

4484 tgt_values = target._ndarray_values 

4485 

4486 indexer, missing = self._engine.get_indexer_non_unique(tgt_values) 

4487 return ensure_platform_int(indexer), missing 

4488 

4489 def get_indexer_for(self, target, **kwargs): 

4490 """ 

4491 Guaranteed return of an indexer even when non-unique. 

4492 

4493 This dispatches to get_indexer or get_indexer_non_unique 

4494 as appropriate. 

4495 

4496 Returns 

4497 ------- 

4498 numpy.ndarray 

4499 List of indices. 

4500 """ 

4501 if self.is_unique: 

4502 return self.get_indexer(target, **kwargs) 

4503 indexer, _ = self.get_indexer_non_unique(target, **kwargs) 

4504 return indexer 

4505 

4506 def _maybe_promote(self, other): 

4507 # A hack, but it works 

4508 

4509 if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): 

4510 return type(other)(self), other 

4511 elif self.inferred_type == "boolean": 

4512 if not is_object_dtype(self.dtype): 

4513 return self.astype("object"), other.astype("object") 

4514 return self, other 

4515 

4516 def groupby(self, values) -> Dict[Hashable, np.ndarray]: 

4517 """ 

4518 Group the index labels by a given array of values. 

4519 

4520 Parameters 

4521 ---------- 

4522 values : array 

4523 Values used to determine the groups. 

4524 

4525 Returns 

4526 ------- 

4527 dict 

4528 {group name -> group labels} 

4529 """ 

4530 

4531 # TODO: if we are a MultiIndex, we can do better 

4532 # that converting to tuples 

4533 if isinstance(values, ABCMultiIndex): 

4534 values = values.values 

4535 values = ensure_categorical(values) 

4536 result = values._reverse_indexer() 

4537 

4538 # map to the label 

4539 result = {k: self.take(v) for k, v in result.items()} 

4540 

4541 return result 

4542 

4543 def map(self, mapper, na_action=None): 

4544 """ 

4545 Map values using input correspondence (a dict, Series, or function). 

4546 

4547 Parameters 

4548 ---------- 

4549 mapper : function, dict, or Series 

4550 Mapping correspondence. 

4551 na_action : {None, 'ignore'} 

4552 If 'ignore', propagate NA values, without passing them to the 

4553 mapping correspondence. 

4554 

4555 Returns 

4556 ------- 

4557 applied : Union[Index, MultiIndex], inferred 

4558 The output of the mapping function applied to the index. 

4559 If the function returns a tuple with more than one element 

4560 a MultiIndex will be returned. 

4561 """ 

4562 

4563 from pandas.core.indexes.multi import MultiIndex 

4564 

4565 new_values = super()._map_values(mapper, na_action=na_action) 

4566 

4567 attributes = self._get_attributes_dict() 

4568 

4569 # we can return a MultiIndex 

4570 if new_values.size and isinstance(new_values[0], tuple): 

4571 if isinstance(self, MultiIndex): 

4572 names = self.names 

4573 elif attributes.get("name"): 

4574 names = [attributes.get("name")] * len(new_values[0]) 

4575 else: 

4576 names = None 

4577 return MultiIndex.from_tuples(new_values, names=names) 

4578 

4579 attributes["copy"] = False 

4580 if not new_values.size: 

4581 # empty 

4582 attributes["dtype"] = self.dtype 

4583 

4584 return Index(new_values, **attributes) 

4585 

4586 def isin(self, values, level=None): 

4587 """ 

4588 Return a boolean array where the index values are in `values`. 

4589 

4590 Compute boolean array of whether each index value is found in the 

4591 passed set of values. The length of the returned boolean array matches 

4592 the length of the index. 

4593 

4594 Parameters 

4595 ---------- 

4596 values : set or list-like 

4597 Sought values. 

4598 level : str or int, optional 

4599 Name or position of the index level to use (if the index is a 

4600 `MultiIndex`). 

4601 

4602 Returns 

4603 ------- 

4604 is_contained : ndarray 

4605 NumPy array of boolean values. 

4606 

4607 See Also 

4608 -------- 

4609 Series.isin : Same for Series. 

4610 DataFrame.isin : Same method for DataFrames. 

4611 

4612 Notes 

4613 ----- 

4614 In the case of `MultiIndex` you must either specify `values` as a 

4615 list-like object containing tuples that are the same length as the 

4616 number of levels, or specify `level`. Otherwise it will raise a 

4617 ``ValueError``. 

4618 

4619 If `level` is specified: 

4620 

4621 - if it is the name of one *and only one* index level, use that level; 

4622 - otherwise it should be a number indicating level position. 

4623 

4624 Examples 

4625 -------- 

4626 >>> idx = pd.Index([1,2,3]) 

4627 >>> idx 

4628 Int64Index([1, 2, 3], dtype='int64') 

4629 

4630 Check whether each index value in a list of values. 

4631 >>> idx.isin([1, 4]) 

4632 array([ True, False, False]) 

4633 

4634 >>> midx = pd.MultiIndex.from_arrays([[1,2,3], 

4635 ... ['red', 'blue', 'green']], 

4636 ... names=('number', 'color')) 

4637 >>> midx 

4638 MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']], 

4639 codes=[[0, 1, 2], [2, 0, 1]], 

4640 names=['number', 'color']) 

4641 

4642 Check whether the strings in the 'color' level of the MultiIndex 

4643 are in a list of colors. 

4644 

4645 >>> midx.isin(['red', 'orange', 'yellow'], level='color') 

4646 array([ True, False, False]) 

4647 

4648 To check across the levels of a MultiIndex, pass a list of tuples: 

4649 

4650 >>> midx.isin([(1, 'red'), (3, 'red')]) 

4651 array([ True, False, False]) 

4652 

4653 For a DatetimeIndex, string values in `values` are converted to 

4654 Timestamps. 

4655 

4656 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] 

4657 >>> dti = pd.to_datetime(dates) 

4658 >>> dti 

4659 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], 

4660 dtype='datetime64[ns]', freq=None) 

4661 

4662 >>> dti.isin(['2000-03-11']) 

4663 array([ True, False, False]) 

4664 """ 

4665 if level is not None: 

4666 self._validate_index_level(level) 

4667 return algos.isin(self, values) 

4668 

4669 def _get_string_slice(self, key, use_lhs=True, use_rhs=True): 

4670 # this is for partial string indexing, 

4671 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex 

4672 raise NotImplementedError 

4673 

4674 def slice_indexer(self, start=None, end=None, step=None, kind=None): 

4675 """ 

4676 For an ordered or unique index, compute the slice indexer for input 

4677 labels and step. 

4678 

4679 Parameters 

4680 ---------- 

4681 start : label, default None 

4682 If None, defaults to the beginning. 

4683 end : label, default None 

4684 If None, defaults to the end. 

4685 step : int, default None 

4686 kind : str, default None 

4687 

4688 Returns 

4689 ------- 

4690 indexer : slice 

4691 

4692 Raises 

4693 ------ 

4694 KeyError : If key does not exist, or key is not unique and index is 

4695 not ordered. 

4696 

4697 Notes 

4698 ----- 

4699 This function assumes that the data is sorted, so use at your own peril 

4700 

4701 Examples 

4702 -------- 

4703 This is a method on all index types. For example you can do: 

4704 

4705 >>> idx = pd.Index(list('abcd')) 

4706 >>> idx.slice_indexer(start='b', end='c') 

4707 slice(1, 3) 

4708 

4709 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) 

4710 >>> idx.slice_indexer(start='b', end=('c', 'g')) 

4711 slice(1, 3) 

4712 """ 

4713 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) 

4714 

4715 # return a slice 

4716 if not is_scalar(start_slice): 

4717 raise AssertionError("Start slice bound is non-scalar") 

4718 if not is_scalar(end_slice): 

4719 raise AssertionError("End slice bound is non-scalar") 

4720 

4721 return slice(start_slice, end_slice, step) 

4722 

4723 def _maybe_cast_indexer(self, key): 

4724 """ 

4725 If we have a float key and are not a floating index, then try to cast 

4726 to an int if equivalent. 

4727 """ 

4728 

4729 if is_float(key) and not self.is_floating(): 

4730 try: 

4731 ckey = int(key) 

4732 if ckey == key: 

4733 key = ckey 

4734 except (OverflowError, ValueError, TypeError): 

4735 pass 

4736 return key 

4737 

4738 def _validate_indexer(self, form, key, kind): 

4739 """ 

4740 If we are positional indexer, validate that we have appropriate 

4741 typed bounds must be an integer. 

4742 """ 

4743 assert kind in ["ix", "loc", "getitem", "iloc"] 

4744 

4745 if key is None: 

4746 pass 

4747 elif is_integer(key): 

4748 pass 

4749 elif kind in ["iloc", "getitem"]: 

4750 self._invalid_indexer(form, key) 

4751 return key 

4752 

4753 _index_shared_docs[ 

4754 "_maybe_cast_slice_bound" 

4755 ] = """ 

4756 This function should be overloaded in subclasses that allow non-trivial 

4757 casting on label-slice bounds, e.g. datetime-like indices allowing 

4758 strings containing formatted datetimes. 

4759 

4760 Parameters 

4761 ---------- 

4762 label : object 

4763 side : {'left', 'right'} 

4764 kind : {'ix', 'loc', 'getitem'} 

4765 

4766 Returns 

4767 ------- 

4768 label : object 

4769 

4770 Notes 

4771 ----- 

4772 Value of `side` parameter should be validated in caller. 

4773 """ 

4774 

4775 @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) 

4776 def _maybe_cast_slice_bound(self, label, side, kind): 

4777 assert kind in ["ix", "loc", "getitem", None] 

4778 

4779 # We are a plain index here (sub-class override this method if they 

4780 # wish to have special treatment for floats/ints, e.g. Float64Index and 

4781 # datetimelike Indexes 

4782 # reject them 

4783 if is_float(label): 

4784 if not (kind in ["ix"] and (self.holds_integer() or self.is_floating())): 

4785 self._invalid_indexer("slice", label) 

4786 

4787 # we are trying to find integer bounds on a non-integer based index 

4788 # this is rejected (generally .loc gets you here) 

4789 elif is_integer(label): 

4790 self._invalid_indexer("slice", label) 

4791 

4792 return label 

4793 

4794 def _searchsorted_monotonic(self, label, side="left"): 

4795 if self.is_monotonic_increasing: 

4796 return self.searchsorted(label, side=side) 

4797 elif self.is_monotonic_decreasing: 

4798 # np.searchsorted expects ascending sort order, have to reverse 

4799 # everything for it to work (element ordering, search side and 

4800 # resulting value). 

4801 pos = self[::-1].searchsorted( 

4802 label, side="right" if side == "left" else "left" 

4803 ) 

4804 return len(self) - pos 

4805 

4806 raise ValueError("index must be monotonic increasing or decreasing") 

4807 

4808 def get_slice_bound(self, label, side, kind): 

4809 """ 

4810 Calculate slice bound that corresponds to given label. 

4811 

4812 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position 

4813 of given label. 

4814 

4815 Parameters 

4816 ---------- 

4817 label : object 

4818 side : {'left', 'right'} 

4819 kind : {'ix', 'loc', 'getitem'} 

4820 

4821 Returns 

4822 ------- 

4823 int 

4824 Index of label. 

4825 """ 

4826 assert kind in ["ix", "loc", "getitem", None] 

4827 

4828 if side not in ("left", "right"): 

4829 raise ValueError( 

4830 f"Invalid value for side kwarg, must be either" 

4831 f" 'left' or 'right': {side}" 

4832 ) 

4833 

4834 original_label = label 

4835 

4836 # For datetime indices label may be a string that has to be converted 

4837 # to datetime boundary according to its resolution. 

4838 label = self._maybe_cast_slice_bound(label, side, kind) 

4839 

4840 # we need to look up the label 

4841 try: 

4842 slc = self.get_loc(label) 

4843 except KeyError as err: 

4844 try: 

4845 return self._searchsorted_monotonic(label, side) 

4846 except ValueError: 

4847 # raise the original KeyError 

4848 raise err 

4849 

4850 if isinstance(slc, np.ndarray): 

4851 # get_loc may return a boolean array or an array of indices, which 

4852 # is OK as long as they are representable by a slice. 

4853 if is_bool_dtype(slc): 

4854 slc = lib.maybe_booleans_to_slice(slc.view("u1")) 

4855 else: 

4856 slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) 

4857 if isinstance(slc, np.ndarray): 

4858 raise KeyError( 

4859 f"Cannot get {side} slice bound for non-unique " 

4860 f"label: {repr(original_label)}" 

4861 ) 

4862 

4863 if isinstance(slc, slice): 

4864 if side == "left": 

4865 return slc.start 

4866 else: 

4867 return slc.stop 

4868 else: 

4869 if side == "right": 

4870 return slc + 1 

4871 else: 

4872 return slc 

4873 

4874 def slice_locs(self, start=None, end=None, step=None, kind=None): 

4875 """ 

4876 Compute slice locations for input labels. 

4877 

4878 Parameters 

4879 ---------- 

4880 start : label, default None 

4881 If None, defaults to the beginning. 

4882 end : label, default None 

4883 If None, defaults to the end. 

4884 step : int, defaults None 

4885 If None, defaults to 1. 

4886 kind : {'ix', 'loc', 'getitem'} or None 

4887 

4888 Returns 

4889 ------- 

4890 start, end : int 

4891 

4892 See Also 

4893 -------- 

4894 Index.get_loc : Get location for a single label. 

4895 

4896 Notes 

4897 ----- 

4898 This method only works if the index is monotonic or unique. 

4899 

4900 Examples 

4901 -------- 

4902 >>> idx = pd.Index(list('abcd')) 

4903 >>> idx.slice_locs(start='b', end='c') 

4904 (1, 3) 

4905 """ 

4906 inc = step is None or step >= 0 

4907 

4908 if not inc: 

4909 # If it's a reverse slice, temporarily swap bounds. 

4910 start, end = end, start 

4911 

4912 # GH 16785: If start and end happen to be date strings with UTC offsets 

4913 # attempt to parse and check that the offsets are the same 

4914 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): 

4915 try: 

4916 ts_start = Timestamp(start) 

4917 ts_end = Timestamp(end) 

4918 except (ValueError, TypeError): 

4919 pass 

4920 else: 

4921 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): 

4922 raise ValueError("Both dates must have the same UTC offset") 

4923 

4924 start_slice = None 

4925 if start is not None: 

4926 start_slice = self.get_slice_bound(start, "left", kind) 

4927 if start_slice is None: 

4928 start_slice = 0 

4929 

4930 end_slice = None 

4931 if end is not None: 

4932 end_slice = self.get_slice_bound(end, "right", kind) 

4933 if end_slice is None: 

4934 end_slice = len(self) 

4935 

4936 if not inc: 

4937 # Bounds at this moment are swapped, swap them back and shift by 1. 

4938 # 

4939 # slice_locs('B', 'A', step=-1): s='B', e='A' 

4940 # 

4941 # s='A' e='B' 

4942 # AFTER SWAP: | | 

4943 # v ------------------> V 

4944 # ----------------------------------- 

4945 # | | |A|A|A|A| | | | | |B|B| | | | | 

4946 # ----------------------------------- 

4947 # ^ <------------------ ^ 

4948 # SHOULD BE: | | 

4949 # end=s-1 start=e-1 

4950 # 

4951 end_slice, start_slice = start_slice - 1, end_slice - 1 

4952 

4953 # i == -1 triggers ``len(self) + i`` selection that points to the 

4954 # last element, not before-the-first one, subtracting len(self) 

4955 # compensates that. 

4956 if end_slice == -1: 

4957 end_slice -= len(self) 

4958 if start_slice == -1: 

4959 start_slice -= len(self) 

4960 

4961 return start_slice, end_slice 

4962 

4963 def delete(self, loc): 

4964 """ 

4965 Make new Index with passed location(-s) deleted. 

4966 

4967 Returns 

4968 ------- 

4969 new_index : Index 

4970 """ 

4971 return self._shallow_copy(np.delete(self._data, loc)) 

4972 

4973 def insert(self, loc, item): 

4974 """ 

4975 Make new Index inserting new item at location. 

4976 

4977 Follows Python list.append semantics for negative values. 

4978 

4979 Parameters 

4980 ---------- 

4981 loc : int 

4982 item : object 

4983 

4984 Returns 

4985 ------- 

4986 new_index : Index 

4987 """ 

4988 _self = np.asarray(self) 

4989 item = self._coerce_scalar_to_index(item)._ndarray_values 

4990 idx = np.concatenate((_self[:loc], item, _self[loc:])) 

4991 return self._shallow_copy_with_infer(idx) 

4992 

4993 def drop(self, labels, errors="raise"): 

4994 """ 

4995 Make new Index with passed list of labels deleted. 

4996 

4997 Parameters 

4998 ---------- 

4999 labels : array-like 

5000 errors : {'ignore', 'raise'}, default 'raise' 

5001 If 'ignore', suppress error and existing labels are dropped. 

5002 

5003 Returns 

5004 ------- 

5005 dropped : Index 

5006 

5007 Raises 

5008 ------ 

5009 KeyError 

5010 If not all of the labels are found in the selected axis 

5011 """ 

5012 arr_dtype = "object" if self.dtype == "object" else None 

5013 labels = com.index_labels_to_array(labels, dtype=arr_dtype) 

5014 indexer = self.get_indexer(labels) 

5015 mask = indexer == -1 

5016 if mask.any(): 

5017 if errors != "ignore": 

5018 raise KeyError(f"{labels[mask]} not found in axis") 

5019 indexer = indexer[~mask] 

5020 return self.delete(indexer) 

5021 

5022 # -------------------------------------------------------------------- 

5023 # Generated Arithmetic, Comparison, and Unary Methods 

5024 

5025 @classmethod 

5026 def _add_comparison_methods(cls): 

5027 """ 

5028 Add in comparison methods. 

5029 """ 

5030 cls.__eq__ = _make_comparison_op(operator.eq, cls) 

5031 cls.__ne__ = _make_comparison_op(operator.ne, cls) 

5032 cls.__lt__ = _make_comparison_op(operator.lt, cls) 

5033 cls.__gt__ = _make_comparison_op(operator.gt, cls) 

5034 cls.__le__ = _make_comparison_op(operator.le, cls) 

5035 cls.__ge__ = _make_comparison_op(operator.ge, cls) 

5036 

5037 @classmethod 

5038 def _add_numeric_methods_add_sub_disabled(cls): 

5039 """ 

5040 Add in the numeric add/sub methods to disable. 

5041 """ 

5042 cls.__add__ = make_invalid_op("__add__") 

5043 cls.__radd__ = make_invalid_op("__radd__") 

5044 cls.__iadd__ = make_invalid_op("__iadd__") 

5045 cls.__sub__ = make_invalid_op("__sub__") 

5046 cls.__rsub__ = make_invalid_op("__rsub__") 

5047 cls.__isub__ = make_invalid_op("__isub__") 

5048 

5049 @classmethod 

5050 def _add_numeric_methods_disabled(cls): 

5051 """ 

5052 Add in numeric methods to disable other than add/sub. 

5053 """ 

5054 cls.__pow__ = make_invalid_op("__pow__") 

5055 cls.__rpow__ = make_invalid_op("__rpow__") 

5056 cls.__mul__ = make_invalid_op("__mul__") 

5057 cls.__rmul__ = make_invalid_op("__rmul__") 

5058 cls.__floordiv__ = make_invalid_op("__floordiv__") 

5059 cls.__rfloordiv__ = make_invalid_op("__rfloordiv__") 

5060 cls.__truediv__ = make_invalid_op("__truediv__") 

5061 cls.__rtruediv__ = make_invalid_op("__rtruediv__") 

5062 cls.__mod__ = make_invalid_op("__mod__") 

5063 cls.__divmod__ = make_invalid_op("__divmod__") 

5064 cls.__neg__ = make_invalid_op("__neg__") 

5065 cls.__pos__ = make_invalid_op("__pos__") 

5066 cls.__abs__ = make_invalid_op("__abs__") 

5067 cls.__inv__ = make_invalid_op("__inv__") 

5068 

5069 @classmethod 

5070 def _add_numeric_methods_binary(cls): 

5071 """ 

5072 Add in numeric methods. 

5073 """ 

5074 cls.__add__ = _make_arithmetic_op(operator.add, cls) 

5075 cls.__radd__ = _make_arithmetic_op(ops.radd, cls) 

5076 cls.__sub__ = _make_arithmetic_op(operator.sub, cls) 

5077 cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls) 

5078 cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls) 

5079 cls.__pow__ = _make_arithmetic_op(operator.pow, cls) 

5080 

5081 cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) 

5082 cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) 

5083 

5084 # TODO: rmod? rdivmod? 

5085 cls.__mod__ = _make_arithmetic_op(operator.mod, cls) 

5086 cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls) 

5087 cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls) 

5088 cls.__divmod__ = _make_arithmetic_op(divmod, cls) 

5089 cls.__mul__ = _make_arithmetic_op(operator.mul, cls) 

5090 cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls) 

5091 

5092 @classmethod 

5093 def _add_numeric_methods_unary(cls): 

5094 """ 

5095 Add in numeric unary methods. 

5096 """ 

5097 

5098 def _make_evaluate_unary(op, opstr): 

5099 def _evaluate_numeric_unary(self): 

5100 

5101 attrs = self._get_attributes_dict() 

5102 return Index(op(self.values), **attrs) 

5103 

5104 _evaluate_numeric_unary.__name__ = opstr 

5105 return _evaluate_numeric_unary 

5106 

5107 cls.__neg__ = _make_evaluate_unary(operator.neg, "__neg__") 

5108 cls.__pos__ = _make_evaluate_unary(operator.pos, "__pos__") 

5109 cls.__abs__ = _make_evaluate_unary(np.abs, "__abs__") 

5110 cls.__inv__ = _make_evaluate_unary(lambda x: -x, "__inv__") 

5111 

5112 @classmethod 

5113 def _add_numeric_methods(cls): 

5114 cls._add_numeric_methods_unary() 

5115 cls._add_numeric_methods_binary() 

5116 

5117 @classmethod 

5118 def _add_logical_methods(cls): 

5119 """ 

5120 Add in logical methods. 

5121 """ 

5122 _doc = """ 

5123 %(desc)s 

5124 

5125 Parameters 

5126 ---------- 

5127 *args 

5128 These parameters will be passed to numpy.%(outname)s. 

5129 **kwargs 

5130 These parameters will be passed to numpy.%(outname)s. 

5131 

5132 Returns 

5133 ------- 

5134 %(outname)s : bool or array_like (if axis is specified) 

5135 A single element array_like may be converted to bool.""" 

5136 

5137 _index_shared_docs["index_all"] = dedent( 

5138 """ 

5139 

5140 See Also 

5141 -------- 

5142 Index.any : Return whether any element in an Index is True. 

5143 Series.any : Return whether any element in a Series is True. 

5144 Series.all : Return whether all elements in a Series are True. 

5145 

5146 Notes 

5147 ----- 

5148 Not a Number (NaN), positive infinity and negative infinity 

5149 evaluate to True because these are not equal to zero. 

5150 

5151 Examples 

5152 -------- 

5153 **all** 

5154 

5155 True, because nonzero integers are considered True. 

5156 

5157 >>> pd.Index([1, 2, 3]).all() 

5158 True 

5159 

5160 False, because ``0`` is considered False. 

5161 

5162 >>> pd.Index([0, 1, 2]).all() 

5163 False 

5164 

5165 **any** 

5166 

5167 True, because ``1`` is considered True. 

5168 

5169 >>> pd.Index([0, 0, 1]).any() 

5170 True 

5171 

5172 False, because ``0`` is considered False. 

5173 

5174 >>> pd.Index([0, 0, 0]).any() 

5175 False 

5176 """ 

5177 ) 

5178 

5179 _index_shared_docs["index_any"] = dedent( 

5180 """ 

5181 

5182 See Also 

5183 -------- 

5184 Index.all : Return whether all elements are True. 

5185 Series.all : Return whether all elements are True. 

5186 

5187 Notes 

5188 ----- 

5189 Not a Number (NaN), positive infinity and negative infinity 

5190 evaluate to True because these are not equal to zero. 

5191 

5192 Examples 

5193 -------- 

5194 >>> index = pd.Index([0, 1, 2]) 

5195 >>> index.any() 

5196 True 

5197 

5198 >>> index = pd.Index([0, 0, 0]) 

5199 >>> index.any() 

5200 False 

5201 """ 

5202 ) 

5203 

5204 def _make_logical_function(name, desc, f): 

5205 @Substitution(outname=name, desc=desc) 

5206 @Appender(_index_shared_docs["index_" + name]) 

5207 @Appender(_doc) 

5208 def logical_func(self, *args, **kwargs): 

5209 result = f(self.values) 

5210 if ( 

5211 isinstance(result, (np.ndarray, ABCSeries, Index)) 

5212 and result.ndim == 0 

5213 ): 

5214 # return NumPy type 

5215 return result.dtype.type(result.item()) 

5216 else: # pragma: no cover 

5217 return result 

5218 

5219 logical_func.__name__ = name 

5220 return logical_func 

5221 

5222 cls.all = _make_logical_function( 

5223 "all", "Return whether all elements are True.", np.all 

5224 ) 

5225 cls.any = _make_logical_function( 

5226 "any", "Return whether any element is True.", np.any 

5227 ) 

5228 

5229 @classmethod 

5230 def _add_logical_methods_disabled(cls): 

5231 """ 

5232 Add in logical methods to disable. 

5233 """ 

5234 cls.all = make_invalid_op("all") 

5235 cls.any = make_invalid_op("any") 

5236 

5237 @property 

5238 def shape(self): 

5239 """ 

5240 Return a tuple of the shape of the underlying data. 

5241 """ 

5242 # not using "(len(self), )" to return "correct" shape if the values 

5243 # consists of a >1 D array (see GH-27775) 

5244 # overridden in MultiIndex.shape to avoid materializing the values 

5245 return self._values.shape 

5246 

5247 

5248Index._add_numeric_methods_disabled() 

5249Index._add_logical_methods() 

5250Index._add_comparison_methods() 

5251 

5252 

5253def ensure_index_from_sequences(sequences, names=None): 

5254 """ 

5255 Construct an index from sequences of data. 

5256 

5257 A single sequence returns an Index. Many sequences returns a 

5258 MultiIndex. 

5259 

5260 Parameters 

5261 ---------- 

5262 sequences : sequence of sequences 

5263 names : sequence of str 

5264 

5265 Returns 

5266 ------- 

5267 index : Index or MultiIndex 

5268 

5269 Examples 

5270 -------- 

5271 >>> ensure_index_from_sequences([[1, 2, 3]], names=['name']) 

5272 Int64Index([1, 2, 3], dtype='int64', name='name') 

5273 

5274 >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], 

5275 names=['L1', 'L2']) 

5276 MultiIndex([('a', 'a'), 

5277 ('a', 'b')], 

5278 names=['L1', 'L2']) 

5279 

5280 See Also 

5281 -------- 

5282 ensure_index 

5283 """ 

5284 from pandas.core.indexes.multi import MultiIndex 

5285 

5286 if len(sequences) == 1: 

5287 if names is not None: 

5288 names = names[0] 

5289 return Index(sequences[0], name=names) 

5290 else: 

5291 return MultiIndex.from_arrays(sequences, names=names) 

5292 

5293 

5294def ensure_index(index_like, copy=False): 

5295 """ 

5296 Ensure that we have an index from some index-like object. 

5297 

5298 Parameters 

5299 ---------- 

5300 index : sequence 

5301 An Index or other sequence 

5302 copy : bool 

5303 

5304 Returns 

5305 ------- 

5306 index : Index or MultiIndex 

5307 

5308 Examples 

5309 -------- 

5310 >>> ensure_index(['a', 'b']) 

5311 Index(['a', 'b'], dtype='object') 

5312 

5313 >>> ensure_index([('a', 'a'), ('b', 'c')]) 

5314 Index([('a', 'a'), ('b', 'c')], dtype='object') 

5315 

5316 >>> ensure_index([['a', 'a'], ['b', 'c']]) 

5317 MultiIndex([('a', 'b'), 

5318 ('a', 'c')], 

5319 dtype='object') 

5320 ) 

5321 

5322 See Also 

5323 -------- 

5324 ensure_index_from_sequences 

5325 """ 

5326 if isinstance(index_like, Index): 

5327 if copy: 

5328 index_like = index_like.copy() 

5329 return index_like 

5330 if hasattr(index_like, "name"): 

5331 return Index(index_like, name=index_like.name, copy=copy) 

5332 

5333 if is_iterator(index_like): 

5334 index_like = list(index_like) 

5335 

5336 # must check for exactly list here because of strict type 

5337 # check in clean_index_list 

5338 if isinstance(index_like, list): 

5339 if type(index_like) != list: 

5340 index_like = list(index_like) 

5341 

5342 converted, all_arrays = lib.clean_index_list(index_like) 

5343 

5344 if len(converted) > 0 and all_arrays: 

5345 from pandas.core.indexes.multi import MultiIndex 

5346 

5347 return MultiIndex.from_arrays(converted) 

5348 else: 

5349 index_like = converted 

5350 else: 

5351 # clean_index_list does the equivalent of copying 

5352 # so only need to do this if not list instance 

5353 if copy: 

5354 from copy import copy 

5355 

5356 index_like = copy(index_like) 

5357 

5358 return Index(index_like) 

5359 

5360 

5361def _ensure_has_len(seq): 

5362 """ 

5363 If seq is an iterator, put its values into a list. 

5364 """ 

5365 try: 

5366 len(seq) 

5367 except TypeError: 

5368 return list(seq) 

5369 else: 

5370 return seq 

5371 

5372 

5373def _trim_front(strings): 

5374 """ 

5375 Trims zeros and decimal points. 

5376 """ 

5377 trimmed = strings 

5378 while len(strings) > 0 and all(x[0] == " " for x in trimmed): 

5379 trimmed = [x[1:] for x in trimmed] 

5380 return trimmed 

5381 

5382 

5383def _validate_join_method(method): 

5384 if method not in ["left", "right", "inner", "outer"]: 

5385 raise ValueError(f"do not recognize join method {method}") 

5386 

5387 

5388def default_index(n): 

5389 from pandas.core.indexes.range import RangeIndex 

5390 

5391 return RangeIndex(0, n, name=None) 

5392 

5393 

5394def maybe_extract_name(name, obj, cls) -> Optional[Hashable]: 

5395 """ 

5396 If no name is passed, then extract it from data, validating hashability. 

5397 """ 

5398 if name is None and isinstance(obj, (Index, ABCSeries)): 

5399 # Note we don't just check for "name" attribute since that would 

5400 # pick up e.g. dtype.name 

5401 name = obj.name 

5402 

5403 # GH#29069 

5404 if not is_hashable(name): 

5405 raise TypeError(f"{cls.__name__}.name must be a hashable type") 

5406 

5407 return name 

5408 

5409 

5410def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: 

5411 """ 

5412 If a dtype is passed, cast to the closest matching dtype that is supported 

5413 by Index. 

5414 

5415 Parameters 

5416 ---------- 

5417 data : np.ndarray 

5418 dtype : np.dtype 

5419 copy : bool 

5420 

5421 Returns 

5422 ------- 

5423 np.ndarray 

5424 """ 

5425 # we need to avoid having numpy coerce 

5426 # things that look like ints/floats to ints unless 

5427 # they are actually ints, e.g. '0' and 0.0 

5428 # should not be coerced 

5429 # GH 11836 

5430 if is_integer_dtype(dtype): 

5431 inferred = lib.infer_dtype(data, skipna=False) 

5432 if inferred == "integer": 

5433 data = maybe_cast_to_integer_array(data, dtype, copy=copy) 

5434 elif inferred in ["floating", "mixed-integer-float"]: 

5435 if isna(data).any(): 

5436 raise ValueError("cannot convert float NaN to integer") 

5437 

5438 if inferred == "mixed-integer-float": 

5439 data = maybe_cast_to_integer_array(data, dtype) 

5440 

5441 # If we are actually all equal to integers, 

5442 # then coerce to integer. 

5443 try: 

5444 data = _try_convert_to_int_array(data, copy, dtype) 

5445 except ValueError: 

5446 data = np.array(data, dtype=np.float64, copy=copy) 

5447 

5448 elif inferred == "string": 

5449 pass 

5450 else: 

5451 data = data.astype(dtype) 

5452 elif is_float_dtype(dtype): 

5453 inferred = lib.infer_dtype(data, skipna=False) 

5454 if inferred == "string": 

5455 pass 

5456 else: 

5457 data = data.astype(dtype) 

5458 else: 

5459 data = np.array(data, dtype=dtype, copy=copy) 

5460 

5461 return data 

5462 

5463 

5464def _maybe_cast_data_without_dtype(subarr): 

5465 """ 

5466 If we have an arraylike input but no passed dtype, try to infer 

5467 a supported dtype. 

5468 

5469 Parameters 

5470 ---------- 

5471 subarr : np.ndarray, Index, or Series 

5472 

5473 Returns 

5474 ------- 

5475 converted : np.ndarray or ExtensionArray 

5476 dtype : np.dtype or ExtensionDtype 

5477 """ 

5478 # Runtime import needed bc IntervalArray imports Index 

5479 from pandas.core.arrays import ( 

5480 IntervalArray, 

5481 PeriodArray, 

5482 DatetimeArray, 

5483 TimedeltaArray, 

5484 ) 

5485 

5486 inferred = lib.infer_dtype(subarr, skipna=False) 

5487 

5488 if inferred == "integer": 

5489 try: 

5490 data = _try_convert_to_int_array(subarr, False, None) 

5491 return data, data.dtype 

5492 except ValueError: 

5493 pass 

5494 

5495 return subarr, object 

5496 

5497 elif inferred in ["floating", "mixed-integer-float", "integer-na"]: 

5498 # TODO: Returns IntegerArray for integer-na case in the future 

5499 return subarr, np.float64 

5500 

5501 elif inferred == "interval": 

5502 try: 

5503 data = IntervalArray._from_sequence(subarr, copy=False) 

5504 return data, data.dtype 

5505 except ValueError: 

5506 # GH27172: mixed closed Intervals --> object dtype 

5507 pass 

5508 elif inferred == "boolean": 

5509 # don't support boolean explicitly ATM 

5510 pass 

5511 elif inferred != "string": 

5512 if inferred.startswith("datetime"): 

5513 try: 

5514 data = DatetimeArray._from_sequence(subarr, copy=False) 

5515 return data, data.dtype 

5516 except (ValueError, OutOfBoundsDatetime): 

5517 # GH 27011 

5518 # If we have mixed timezones, just send it 

5519 # down the base constructor 

5520 pass 

5521 

5522 elif inferred.startswith("timedelta"): 

5523 data = TimedeltaArray._from_sequence(subarr, copy=False) 

5524 return data, data.dtype 

5525 elif inferred == "period": 

5526 try: 

5527 data = PeriodArray._from_sequence(subarr) 

5528 return data, data.dtype 

5529 except IncompatibleFrequency: 

5530 pass 

5531 

5532 return subarr, subarr.dtype 

5533 

5534 

5535def _try_convert_to_int_array( 

5536 data: np.ndarray, copy: bool, dtype: np.dtype 

5537) -> np.ndarray: 

5538 """ 

5539 Attempt to convert an array of data into an integer array. 

5540 

5541 Parameters 

5542 ---------- 

5543 data : The data to convert. 

5544 copy : bool 

5545 Whether to copy the data or not. 

5546 dtype : np.dtype 

5547 

5548 Returns 

5549 ------- 

5550 int_array : data converted to either an ndarray[int64] or ndarray[uint64] 

5551 

5552 Raises 

5553 ------ 

5554 ValueError if the conversion was not successful. 

5555 """ 

5556 

5557 if not is_unsigned_integer_dtype(dtype): 

5558 # skip int64 conversion attempt if uint-like dtype is passed, as 

5559 # this could return Int64Index when UInt64Index is what's desired 

5560 try: 

5561 res = data.astype("i8", copy=False) 

5562 if (res == data).all(): 

5563 return res # TODO: might still need to copy 

5564 except (OverflowError, TypeError, ValueError): 

5565 pass 

5566 

5567 # Conversion to int64 failed (possibly due to overflow) or was skipped, 

5568 # so let's try now with uint64. 

5569 try: 

5570 res = data.astype("u8", copy=False) 

5571 if (res == data).all(): 

5572 return res # TODO: might still need to copy 

5573 except (OverflowError, TypeError, ValueError): 

5574 pass 

5575 

5576 raise ValueError