Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np 

2 

3from pandas._libs import index as libindex, lib 

4from pandas._typing import Dtype 

5from pandas.util._decorators import Appender, cache_readonly 

6 

7from pandas.core.dtypes.cast import astype_nansafe 

8from pandas.core.dtypes.common import ( 

9 is_bool, 

10 is_bool_dtype, 

11 is_dtype_equal, 

12 is_extension_array_dtype, 

13 is_float, 

14 is_float_dtype, 

15 is_integer_dtype, 

16 is_scalar, 

17 is_signed_integer_dtype, 

18 is_unsigned_integer_dtype, 

19 needs_i8_conversion, 

20 pandas_dtype, 

21) 

22from pandas.core.dtypes.generic import ( 

23 ABCFloat64Index, 

24 ABCInt64Index, 

25 ABCRangeIndex, 

26 ABCSeries, 

27 ABCUInt64Index, 

28) 

29from pandas.core.dtypes.missing import isna 

30 

31from pandas.core import algorithms 

32import pandas.core.common as com 

33from pandas.core.indexes.base import ( 

34 Index, 

35 InvalidIndexError, 

36 _index_shared_docs, 

37 maybe_extract_name, 

38) 

39from pandas.core.ops import get_op_result_name 

40 

41_num_index_shared_docs = dict() 

42 

43 

44class NumericIndex(Index): 

45 """ 

46 Provide numeric type operations. 

47 

48 This is an abstract class. 

49 """ 

50 

51 _is_numeric_dtype = True 

52 

53 def __new__(cls, data=None, dtype=None, copy=False, name=None): 

54 cls._validate_dtype(dtype) 

55 

56 # Coerce to ndarray if not already ndarray or Index 

57 if not isinstance(data, (np.ndarray, Index)): 

58 if is_scalar(data): 

59 raise cls._scalar_data_error(data) 

60 

61 # other iterable of some kind 

62 if not isinstance(data, (ABCSeries, list, tuple)): 

63 data = list(data) 

64 

65 data = np.asarray(data, dtype=dtype) 

66 

67 if issubclass(data.dtype.type, str): 

68 cls._string_data_error(data) 

69 

70 if copy or not is_dtype_equal(data.dtype, cls._default_dtype): 

71 subarr = np.array(data, dtype=cls._default_dtype, copy=copy) 

72 cls._assert_safe_casting(data, subarr) 

73 else: 

74 subarr = data 

75 

76 if subarr.ndim > 1: 

77 # GH#13601, GH#20285, GH#27125 

78 raise ValueError("Index data must be 1-dimensional") 

79 

80 name = maybe_extract_name(name, data, cls) 

81 return cls._simple_new(subarr, name=name) 

82 

83 @classmethod 

84 def _validate_dtype(cls, dtype: Dtype) -> None: 

85 if dtype is None: 

86 return 

87 validation_metadata = { 

88 "int64index": (is_signed_integer_dtype, "signed integer"), 

89 "uint64index": (is_unsigned_integer_dtype, "unsigned integer"), 

90 "float64index": (is_float_dtype, "float"), 

91 "rangeindex": (is_signed_integer_dtype, "signed integer"), 

92 } 

93 

94 validation_func, expected = validation_metadata[cls._typ] 

95 if not validation_func(dtype): 

96 raise ValueError( 

97 f"Incorrect `dtype` passed: expected {expected}, received {dtype}" 

98 ) 

99 

100 @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) 

101 def _maybe_cast_slice_bound(self, label, side, kind): 

102 assert kind in ["ix", "loc", "getitem", None] 

103 

104 # we will try to coerce to integers 

105 return self._maybe_cast_indexer(label) 

106 

107 @Appender(_index_shared_docs["_shallow_copy"]) 

108 def _shallow_copy(self, values=None, **kwargs): 

109 if values is not None and not self._can_hold_na: 

110 # Ensure we are not returning an Int64Index with float data: 

111 return self._shallow_copy_with_infer(values=values, **kwargs) 

112 return super()._shallow_copy(values=values, **kwargs) 

113 

114 def _convert_for_op(self, value): 

115 """ 

116 Convert value to be insertable to ndarray. 

117 """ 

118 if is_bool(value) or is_bool_dtype(value): 

119 # force conversion to object 

120 # so we don't lose the bools 

121 raise TypeError 

122 

123 return value 

124 

125 def _convert_tolerance(self, tolerance, target): 

126 tolerance = np.asarray(tolerance) 

127 if target.size != tolerance.size and tolerance.size > 1: 

128 raise ValueError("list-like tolerance size must match target index size") 

129 if not np.issubdtype(tolerance.dtype, np.number): 

130 if tolerance.ndim > 0: 

131 raise ValueError( 

132 f"tolerance argument for {type(self).__name__} must contain " 

133 "numeric elements if it is list type" 

134 ) 

135 else: 

136 raise ValueError( 

137 f"tolerance argument for {type(self).__name__} must be numeric " 

138 f"if it is a scalar: {repr(tolerance)}" 

139 ) 

140 return tolerance 

141 

142 @classmethod 

143 def _assert_safe_casting(cls, data, subarr): 

144 """ 

145 Subclasses need to override this only if the process of casting data 

146 from some accepted dtype to the internal dtype(s) bears the risk of 

147 truncation (e.g. float to int). 

148 """ 

149 pass 

150 

151 def _concat_same_dtype(self, indexes, name): 

152 result = type(indexes[0])(np.concatenate([x._values for x in indexes])) 

153 return result.rename(name) 

154 

155 @property 

156 def is_all_dates(self) -> bool: 

157 """ 

158 Checks that all the labels are datetime objects. 

159 """ 

160 return False 

161 

162 @Appender(Index.insert.__doc__) 

163 def insert(self, loc, item): 

164 # treat NA values as nans: 

165 if is_scalar(item) and isna(item): 

166 item = self._na_value 

167 return super().insert(loc, item) 

168 

169 def _union(self, other, sort): 

170 # Right now, we treat union(int, float) a bit special. 

171 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion 

172 # We may change union(int, float) to go to object. 

173 # float | [u]int -> float (the special case) 

174 # <T> | <T> -> T 

175 # <T> | <U> -> object 

176 needs_cast = (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or ( 

177 is_integer_dtype(other.dtype) and is_float_dtype(self.dtype) 

178 ) 

179 if needs_cast: 

180 first = self.astype("float") 

181 second = other.astype("float") 

182 return first._union(second, sort) 

183 else: 

184 return super()._union(other, sort) 

185 

186 

187_num_index_shared_docs[ 

188 "class_descr" 

189] = """ 

190 Immutable ndarray implementing an ordered, sliceable set. The basic object 

191 storing axis labels for all pandas objects. %(klass)s is a special case 

192 of `Index` with purely %(ltype)s labels. %(extra)s. 

193 

194 Parameters 

195 ---------- 

196 data : array-like (1-dimensional) 

197 dtype : NumPy dtype (default: %(dtype)s) 

198 copy : bool 

199 Make a copy of input ndarray. 

200 name : object 

201 Name to be stored in the index. 

202 

203 Attributes 

204 ---------- 

205 None 

206 

207 Methods 

208 ------- 

209 None 

210 

211 See Also 

212 -------- 

213 Index : The base pandas Index type. 

214 

215 Notes 

216 ----- 

217 An Index instance can **only** contain hashable objects. 

218""" 

219 

220_int64_descr_args = dict(klass="Int64Index", ltype="integer", dtype="int64", extra="") 

221 

222 

223class IntegerIndex(NumericIndex): 

224 """ 

225 This is an abstract class for Int64Index, UInt64Index. 

226 """ 

227 

228 def __contains__(self, key) -> bool: 

229 """ 

230 Check if key is a float and has a decimal. If it has, return False. 

231 """ 

232 hash(key) 

233 try: 

234 if is_float(key) and int(key) != key: 

235 return False 

236 return key in self._engine 

237 except (OverflowError, TypeError, ValueError): 

238 return False 

239 

240 

241class Int64Index(IntegerIndex): 

242 __doc__ = _num_index_shared_docs["class_descr"] % _int64_descr_args 

243 

244 _typ = "int64index" 

245 _can_hold_na = False 

246 _engine_type = libindex.Int64Engine 

247 _default_dtype = np.int64 

248 

249 @property 

250 def inferred_type(self) -> str: 

251 """ 

252 Always 'integer' for ``Int64Index`` 

253 """ 

254 return "integer" 

255 

256 @property 

257 def asi8(self) -> np.ndarray: 

258 # do not cache or you'll create a memory leak 

259 return self.values.view("i8") 

260 

261 @Appender(_index_shared_docs["_convert_scalar_indexer"]) 

262 def _convert_scalar_indexer(self, key, kind=None): 

263 assert kind in ["ix", "loc", "getitem", "iloc", None] 

264 

265 # don't coerce ilocs to integers 

266 if kind != "iloc": 

267 key = self._maybe_cast_indexer(key) 

268 return super()._convert_scalar_indexer(key, kind=kind) 

269 

270 def _wrap_joined_index(self, joined, other): 

271 name = get_op_result_name(self, other) 

272 return Int64Index(joined, name=name) 

273 

274 @classmethod 

275 def _assert_safe_casting(cls, data, subarr): 

276 """ 

277 Ensure incoming data can be represented as ints. 

278 """ 

279 if not issubclass(data.dtype.type, np.signedinteger): 

280 if not np.array_equal(data, subarr): 

281 raise TypeError("Unsafe NumPy casting, you must explicitly cast") 

282 

283 def _is_compatible_with_other(self, other): 

284 return super()._is_compatible_with_other(other) or all( 

285 isinstance(type(obj), (ABCInt64Index, ABCFloat64Index, ABCRangeIndex)) 

286 for obj in [self, other] 

287 ) 

288 

289 

290Int64Index._add_numeric_methods() 

291Int64Index._add_logical_methods() 

292 

293_uint64_descr_args = dict( 

294 klass="UInt64Index", ltype="unsigned integer", dtype="uint64", extra="" 

295) 

296 

297 

298class UInt64Index(IntegerIndex): 

299 __doc__ = _num_index_shared_docs["class_descr"] % _uint64_descr_args 

300 

301 _typ = "uint64index" 

302 _can_hold_na = False 

303 _engine_type = libindex.UInt64Engine 

304 _default_dtype = np.uint64 

305 

306 @property 

307 def inferred_type(self) -> str: 

308 """ 

309 Always 'integer' for ``UInt64Index`` 

310 """ 

311 return "integer" 

312 

313 @property 

314 def asi8(self) -> np.ndarray: 

315 # do not cache or you'll create a memory leak 

316 return self.values.view("u8") 

317 

318 @Appender(_index_shared_docs["_convert_scalar_indexer"]) 

319 def _convert_scalar_indexer(self, key, kind=None): 

320 assert kind in ["ix", "loc", "getitem", "iloc", None] 

321 

322 # don't coerce ilocs to integers 

323 if kind != "iloc": 

324 key = self._maybe_cast_indexer(key) 

325 return super()._convert_scalar_indexer(key, kind=kind) 

326 

327 @Appender(_index_shared_docs["_convert_arr_indexer"]) 

328 def _convert_arr_indexer(self, keyarr): 

329 # Cast the indexer to uint64 if possible so that the values returned 

330 # from indexing are also uint64. 

331 dtype = None 

332 if is_integer_dtype(keyarr) or ( 

333 lib.infer_dtype(keyarr, skipna=False) == "integer" 

334 ): 

335 dtype = np.uint64 

336 

337 return com.asarray_tuplesafe(keyarr, dtype=dtype) 

338 

339 @Appender(_index_shared_docs["_convert_index_indexer"]) 

340 def _convert_index_indexer(self, keyarr): 

341 # Cast the indexer to uint64 if possible so 

342 # that the values returned from indexing are 

343 # also uint64. 

344 if keyarr.is_integer(): 

345 return keyarr.astype(np.uint64) 

346 return keyarr 

347 

348 def _wrap_joined_index(self, joined, other): 

349 name = get_op_result_name(self, other) 

350 return UInt64Index(joined, name=name) 

351 

352 @classmethod 

353 def _assert_safe_casting(cls, data, subarr): 

354 """ 

355 Ensure incoming data can be represented as uints. 

356 """ 

357 if not issubclass(data.dtype.type, np.unsignedinteger): 

358 if not np.array_equal(data, subarr): 

359 raise TypeError("Unsafe NumPy casting, you must explicitly cast") 

360 

361 def _is_compatible_with_other(self, other): 

362 return super()._is_compatible_with_other(other) or all( 

363 isinstance(type(obj), (ABCUInt64Index, ABCFloat64Index)) 

364 for obj in [self, other] 

365 ) 

366 

367 

368UInt64Index._add_numeric_methods() 

369UInt64Index._add_logical_methods() 

370 

371_float64_descr_args = dict( 

372 klass="Float64Index", dtype="float64", ltype="float", extra="" 

373) 

374 

375 

376class Float64Index(NumericIndex): 

377 __doc__ = _num_index_shared_docs["class_descr"] % _float64_descr_args 

378 

379 _typ = "float64index" 

380 _engine_type = libindex.Float64Engine 

381 _default_dtype = np.float64 

382 

383 @property 

384 def inferred_type(self) -> str: 

385 """ 

386 Always 'floating' for ``Float64Index`` 

387 """ 

388 return "floating" 

389 

390 @Appender(_index_shared_docs["astype"]) 

391 def astype(self, dtype, copy=True): 

392 dtype = pandas_dtype(dtype) 

393 if needs_i8_conversion(dtype): 

394 raise TypeError( 

395 f"Cannot convert Float64Index to dtype {dtype}; integer " 

396 "values are required for conversion" 

397 ) 

398 elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): 

399 # TODO(jreback); this can change once we have an EA Index type 

400 # GH 13149 

401 arr = astype_nansafe(self.values, dtype=dtype) 

402 return Int64Index(arr) 

403 return super().astype(dtype, copy=copy) 

404 

405 @Appender(_index_shared_docs["_convert_scalar_indexer"]) 

406 def _convert_scalar_indexer(self, key, kind=None): 

407 assert kind in ["ix", "loc", "getitem", "iloc", None] 

408 

409 if kind == "iloc": 

410 return self._validate_indexer("positional", key, kind) 

411 

412 return key 

413 

414 @Appender(_index_shared_docs["_convert_slice_indexer"]) 

415 def _convert_slice_indexer(self, key, kind=None): 

416 # if we are not a slice, then we are done 

417 if not isinstance(key, slice): 

418 return key 

419 

420 if kind == "iloc": 

421 return super()._convert_slice_indexer(key, kind=kind) 

422 

423 # translate to locations 

424 return self.slice_indexer(key.start, key.stop, key.step, kind=kind) 

425 

426 def _format_native_types( 

427 self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs 

428 ): 

429 from pandas.io.formats.format import FloatArrayFormatter 

430 

431 formatter = FloatArrayFormatter( 

432 self.values, 

433 na_rep=na_rep, 

434 float_format=float_format, 

435 decimal=decimal, 

436 quoting=quoting, 

437 fixed_width=False, 

438 ) 

439 return formatter.get_result_as_array() 

440 

441 def get_value(self, series, key): 

442 """ 

443 We always want to get an index value, never a value. 

444 """ 

445 if not is_scalar(key): 

446 raise InvalidIndexError 

447 

448 k = com.values_from_object(key) 

449 loc = self.get_loc(k) 

450 new_values = com.values_from_object(series)[loc] 

451 

452 return new_values 

453 

454 def equals(self, other) -> bool: 

455 """ 

456 Determines if two Index objects contain the same elements. 

457 """ 

458 if self is other: 

459 return True 

460 

461 if not isinstance(other, Index): 

462 return False 

463 

464 # need to compare nans locations and make sure that they are the same 

465 # since nans don't compare equal this is a bit tricky 

466 try: 

467 if not isinstance(other, Float64Index): 

468 other = self._constructor(other) 

469 if not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape: 

470 return False 

471 left, right = self._ndarray_values, other._ndarray_values 

472 return ((left == right) | (self._isnan & other._isnan)).all() 

473 except (TypeError, ValueError): 

474 return False 

475 

476 def __contains__(self, other) -> bool: 

477 if super().__contains__(other): 

478 return True 

479 

480 try: 

481 # if other is a sequence this throws a ValueError 

482 return np.isnan(other) and self.hasnans 

483 except ValueError: 

484 try: 

485 return len(other) <= 1 and other.item() in self 

486 except AttributeError: 

487 return len(other) <= 1 and other in self 

488 except TypeError: 

489 pass 

490 except TypeError: 

491 pass 

492 

493 return False 

494 

495 @Appender(_index_shared_docs["get_loc"]) 

496 def get_loc(self, key, method=None, tolerance=None): 

497 try: 

498 if np.all(np.isnan(key)) or is_bool(key): 

499 nan_idxs = self._nan_idxs 

500 try: 

501 return nan_idxs.item() 

502 except ValueError: 

503 if not len(nan_idxs): 

504 raise KeyError(key) 

505 return nan_idxs 

506 except (TypeError, NotImplementedError): 

507 pass 

508 return super().get_loc(key, method=method, tolerance=tolerance) 

509 

510 @cache_readonly 

511 def is_unique(self) -> bool: 

512 return super().is_unique and self._nan_idxs.size < 2 

513 

514 @Appender(Index.isin.__doc__) 

515 def isin(self, values, level=None): 

516 if level is not None: 

517 self._validate_index_level(level) 

518 return algorithms.isin(np.array(self), values) 

519 

520 def _is_compatible_with_other(self, other): 

521 return super()._is_compatible_with_other(other) or all( 

522 isinstance( 

523 type(obj), 

524 (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex), 

525 ) 

526 for obj in [self, other] 

527 ) 

528 

529 

530Float64Index._add_numeric_methods() 

531Float64Index._add_logical_methods_disabled()