Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import collections 

2from datetime import timedelta 

3import functools 

4import gc 

5import json 

6import operator 

7import pickle 

8import re 

9from textwrap import dedent 

10from typing import ( 

11 Any, 

12 Callable, 

13 Dict, 

14 FrozenSet, 

15 Hashable, 

16 List, 

17 Mapping, 

18 Optional, 

19 Sequence, 

20 Set, 

21 Tuple, 

22 Type, 

23 Union, 

24) 

25import warnings 

26import weakref 

27 

28import numpy as np 

29 

30from pandas._config import config 

31 

32from pandas._libs import Timestamp, iNaT, lib, properties 

33from pandas._typing import ( 

34 Axis, 

35 Dtype, 

36 FilePathOrBuffer, 

37 FrameOrSeries, 

38 JSONSerializable, 

39 Level, 

40 Renamer, 

41) 

42from pandas.compat import set_function_name 

43from pandas.compat._optional import import_optional_dependency 

44from pandas.compat.numpy import function as nv 

45from pandas.errors import AbstractMethodError 

46from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature 

47from pandas.util._validators import ( 

48 validate_bool_kwarg, 

49 validate_fillna_kwargs, 

50 validate_percentile, 

51) 

52 

53from pandas.core.dtypes.common import ( 

54 ensure_int64, 

55 ensure_object, 

56 ensure_str, 

57 is_bool, 

58 is_bool_dtype, 

59 is_datetime64_any_dtype, 

60 is_datetime64tz_dtype, 

61 is_dict_like, 

62 is_extension_array_dtype, 

63 is_float, 

64 is_integer, 

65 is_list_like, 

66 is_number, 

67 is_numeric_dtype, 

68 is_object_dtype, 

69 is_period_arraylike, 

70 is_re_compilable, 

71 is_scalar, 

72 is_timedelta64_dtype, 

73 pandas_dtype, 

74) 

75from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries 

76from pandas.core.dtypes.inference import is_hashable 

77from pandas.core.dtypes.missing import isna, notna 

78 

79import pandas as pd 

80from pandas.core import missing, nanops 

81import pandas.core.algorithms as algos 

82from pandas.core.base import PandasObject, SelectionMixin 

83import pandas.core.common as com 

84from pandas.core.construction import create_series_with_explicit_dtype 

85from pandas.core.indexes.api import ( 

86 Index, 

87 InvalidIndexError, 

88 MultiIndex, 

89 RangeIndex, 

90 ensure_index, 

91) 

92from pandas.core.indexes.datetimes import DatetimeIndex 

93from pandas.core.indexes.period import Period, PeriodIndex 

94import pandas.core.indexing as indexing 

95from pandas.core.internals import BlockManager 

96from pandas.core.missing import find_valid_index 

97from pandas.core.ops import _align_method_FRAME 

98 

99from pandas.io.formats import format as fmt 

100from pandas.io.formats.format import DataFrameFormatter, format_percentiles 

101from pandas.io.formats.printing import pprint_thing 

102from pandas.tseries.frequencies import to_offset 

103 

104# goal is to be able to define the docs close to function, while still being 

105# able to share 

106_shared_docs: Dict[str, str] = dict() 

107_shared_doc_kwargs = dict( 

108 axes="keywords for axes", 

109 klass="Series/DataFrame", 

110 axes_single_arg="int or labels for object", 

111 args_transpose="axes to permute (int or label for object)", 

112 optional_by=""" 

113 by : str or list of str 

114 Name or list of names to sort by""", 

115) 

116 

117 

118def _single_replace(self, to_replace, method, inplace, limit): 

119 """ 

120 Replaces values in a Series using the fill method specified when no 

121 replacement value is given in the replace method 

122 """ 

123 if self.ndim != 1: 

124 raise TypeError( 

125 f"cannot replace {to_replace} with method {method} on a " 

126 f"{type(self).__name__}" 

127 ) 

128 

129 orig_dtype = self.dtype 

130 result = self if inplace else self.copy() 

131 fill_f = missing.get_fill_func(method) 

132 

133 mask = missing.mask_missing(result.values, to_replace) 

134 values = fill_f(result.values, limit=limit, mask=mask) 

135 

136 if values.dtype == orig_dtype and inplace: 

137 return 

138 

139 result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) 

140 

141 if inplace: 

142 self._update_inplace(result._data) 

143 return 

144 

145 return result 

146 

147 

148bool_t = bool # Need alias because NDFrame has def bool: 

149 

150 

151class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): 

152 """ 

153 N-dimensional analogue of DataFrame. Store multi-dimensional in a 

154 size-mutable, labeled data structure 

155 

156 Parameters 

157 ---------- 

158 data : BlockManager 

159 axes : list 

160 copy : bool, default False 

161 """ 

162 

163 _internal_names: List[str] = [ 

164 "_data", 

165 "_cacher", 

166 "_item_cache", 

167 "_cache", 

168 "_is_copy", 

169 "_subtyp", 

170 "_name", 

171 "_index", 

172 "_default_kind", 

173 "_default_fill_value", 

174 "_metadata", 

175 "__array_struct__", 

176 "__array_interface__", 

177 ] 

178 _internal_names_set: Set[str] = set(_internal_names) 

179 _accessors: Set[str] = set() 

180 _deprecations: FrozenSet[str] = frozenset(["get_values", "ix"]) 

181 _metadata: List[str] = [] 

182 _is_copy = None 

183 _data: BlockManager 

184 _attrs: Dict[Optional[Hashable], Any] 

185 _typ: str 

186 

187 # ---------------------------------------------------------------------- 

188 # Constructors 

189 

190 def __init__( 

191 self, 

192 data: BlockManager, 

193 axes: Optional[List[Index]] = None, 

194 copy: bool = False, 

195 dtype: Optional[Dtype] = None, 

196 attrs: Optional[Mapping[Optional[Hashable], Any]] = None, 

197 fastpath: bool = False, 

198 ): 

199 

200 if not fastpath: 

201 if dtype is not None: 

202 data = data.astype(dtype) 

203 elif copy: 

204 data = data.copy() 

205 

206 if axes is not None: 

207 for i, ax in enumerate(axes): 

208 data = data.reindex_axis(ax, axis=i) 

209 

210 object.__setattr__(self, "_is_copy", None) 

211 object.__setattr__(self, "_data", data) 

212 object.__setattr__(self, "_item_cache", {}) 

213 if attrs is None: 

214 attrs = {} 

215 else: 

216 attrs = dict(attrs) 

217 object.__setattr__(self, "_attrs", attrs) 

218 

219 def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): 

220 """ passed a manager and a axes dict """ 

221 for a, axe in axes.items(): 

222 if axe is not None: 

223 mgr = mgr.reindex_axis( 

224 axe, axis=self._get_block_manager_axis(a), copy=False 

225 ) 

226 

227 # make a copy if explicitly requested 

228 if copy: 

229 mgr = mgr.copy() 

230 if dtype is not None: 

231 # avoid further copies if we can 

232 if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype: 

233 mgr = mgr.astype(dtype=dtype) 

234 return mgr 

235 

236 # ---------------------------------------------------------------------- 

237 

238 @property 

239 def attrs(self) -> Dict[Optional[Hashable], Any]: 

240 """ 

241 Dictionary of global attributes on this object. 

242 

243 .. warning:: 

244 

245 attrs is experimental and may change without warning. 

246 """ 

247 if self._attrs is None: 

248 self._attrs = {} 

249 return self._attrs 

250 

251 @attrs.setter 

252 def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None: 

253 self._attrs = dict(value) 

254 

255 def _validate_dtype(self, dtype): 

256 """ validate the passed dtype """ 

257 

258 if dtype is not None: 

259 dtype = pandas_dtype(dtype) 

260 

261 # a compound dtype 

262 if dtype.kind == "V": 

263 raise NotImplementedError( 

264 "compound dtypes are not implemented" 

265 f" in the {type(self).__name__} constructor" 

266 ) 

267 

268 return dtype 

269 

270 # ---------------------------------------------------------------------- 

271 # Construction 

272 

273 @property 

274 def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: 

275 """Used when a manipulation result has the same dimensions as the 

276 original. 

277 """ 

278 raise AbstractMethodError(self) 

279 

280 @property 

281 def _constructor_sliced(self): 

282 """Used when a manipulation result has one lower dimension(s) as the 

283 original, such as DataFrame single columns slicing. 

284 """ 

285 raise AbstractMethodError(self) 

286 

287 @property 

288 def _constructor_expanddim(self): 

289 """Used when a manipulation result has one higher dimension as the 

290 original, such as Series.to_frame() 

291 """ 

292 raise NotImplementedError 

293 

294 # ---------------------------------------------------------------------- 

295 # Axis 

296 _AXIS_ALIASES = {"rows": 0} 

297 _AXIS_IALIASES = {0: "rows"} 

298 _stat_axis_number = 0 

299 _stat_axis_name = "index" 

300 _ix = None 

301 _AXIS_ORDERS: List[str] 

302 _AXIS_NUMBERS: Dict[str, int] 

303 _AXIS_NAMES: Dict[int, str] 

304 _AXIS_REVERSED: bool 

305 _info_axis_number: int 

306 _info_axis_name: str 

307 _AXIS_LEN: int 

308 

309 @classmethod 

310 def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None: 

311 """ 

312 Provide axes setup for the major PandasObjects. 

313 

314 Parameters 

315 ---------- 

316 axes : the names of the axes in order (lowest to highest) 

317 docs : docstrings for the axis properties 

318 """ 

319 info_axis = len(axes) - 1 

320 axes_are_reversed = len(axes) > 1 

321 

322 cls._AXIS_ORDERS = axes 

323 cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)} 

324 cls._AXIS_LEN = len(axes) 

325 cls._AXIS_NAMES = dict(enumerate(axes)) 

326 cls._AXIS_REVERSED = axes_are_reversed 

327 

328 cls._info_axis_number = info_axis 

329 cls._info_axis_name = axes[info_axis] 

330 

331 # setup the actual axis 

332 def set_axis(a, i): 

333 setattr(cls, a, properties.AxisProperty(i, docs.get(a, a))) 

334 cls._internal_names_set.add(a) 

335 

336 if axes_are_reversed: 

337 for i, a in cls._AXIS_NAMES.items(): 

338 set_axis(a, 1 - i) 

339 else: 

340 for i, a in cls._AXIS_NAMES.items(): 

341 set_axis(a, i) 

342 

343 def _construct_axes_dict(self, axes=None, **kwargs): 

344 """Return an axes dictionary for myself.""" 

345 d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} 

346 d.update(kwargs) 

347 return d 

348 

349 @staticmethod 

350 def _construct_axes_dict_from(self, axes, **kwargs): 

351 """Return an axes dictionary for the passed axes.""" 

352 d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)} 

353 d.update(kwargs) 

354 return d 

355 

356 def _construct_axes_from_arguments( 

357 self, args, kwargs, require_all: bool = False, sentinel=None 

358 ): 

359 """Construct and returns axes if supplied in args/kwargs. 

360 

361 If require_all, raise if all axis arguments are not supplied 

362 return a tuple of (axes, kwargs). 

363 

364 sentinel specifies the default parameter when an axis is not 

365 supplied; useful to distinguish when a user explicitly passes None 

366 in scenarios where None has special meaning. 

367 """ 

368 

369 # construct the args 

370 args = list(args) 

371 for a in self._AXIS_ORDERS: 

372 

373 # look for a argument by position 

374 if a not in kwargs: 

375 try: 

376 kwargs[a] = args.pop(0) 

377 except IndexError: 

378 if require_all: 

379 raise TypeError("not enough/duplicate arguments specified!") 

380 

381 axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS} 

382 return axes, kwargs 

383 

384 @classmethod 

385 def _from_axes(cls: Type[FrameOrSeries], data, axes, **kwargs) -> FrameOrSeries: 

386 # for construction from BlockManager 

387 if isinstance(data, BlockManager): 

388 return cls(data, **kwargs) 

389 else: 

390 if cls._AXIS_REVERSED: 

391 axes = axes[::-1] 

392 d = cls._construct_axes_dict_from(cls, axes, copy=False) 

393 d.update(kwargs) 

394 return cls(data, **d) 

395 

396 @classmethod 

397 def _get_axis_number(cls, axis): 

398 axis = cls._AXIS_ALIASES.get(axis, axis) 

399 if is_integer(axis): 

400 if axis in cls._AXIS_NAMES: 

401 return axis 

402 else: 

403 try: 

404 return cls._AXIS_NUMBERS[axis] 

405 except KeyError: 

406 pass 

407 raise ValueError(f"No axis named {axis} for object type {cls}") 

408 

409 @classmethod 

410 def _get_axis_name(cls, axis): 

411 axis = cls._AXIS_ALIASES.get(axis, axis) 

412 if isinstance(axis, str): 

413 if axis in cls._AXIS_NUMBERS: 

414 return axis 

415 else: 

416 try: 

417 return cls._AXIS_NAMES[axis] 

418 except KeyError: 

419 pass 

420 raise ValueError(f"No axis named {axis} for object type {cls}") 

421 

422 def _get_axis(self, axis): 

423 name = self._get_axis_name(axis) 

424 return getattr(self, name) 

425 

426 @classmethod 

427 def _get_block_manager_axis(cls, axis): 

428 """Map the axis to the block_manager axis.""" 

429 axis = cls._get_axis_number(axis) 

430 if cls._AXIS_REVERSED: 

431 m = cls._AXIS_LEN - 1 

432 return m - axis 

433 return axis 

434 

435 def _get_axis_resolvers(self, axis: str) -> Dict[str, ABCSeries]: 

436 # index or columns 

437 axis_index = getattr(self, axis) 

438 d = dict() 

439 prefix = axis[0] 

440 

441 for i, name in enumerate(axis_index.names): 

442 if name is not None: 

443 key = level = name 

444 else: 

445 # prefix with 'i' or 'c' depending on the input axis 

446 # e.g., you must do ilevel_0 for the 0th level of an unnamed 

447 # multiiindex 

448 key = f"{prefix}level_{i}" 

449 level = i 

450 

451 level_values = axis_index.get_level_values(level) 

452 s = level_values.to_series() 

453 s.index = axis_index 

454 d[key] = s 

455 

456 # put the index/columns itself in the dict 

457 if isinstance(axis_index, MultiIndex): 

458 dindex = axis_index 

459 else: 

460 dindex = axis_index.to_series() 

461 

462 d[axis] = dindex 

463 return d 

464 

465 def _get_index_resolvers(self) -> Dict[str, ABCSeries]: 

466 from pandas.core.computation.parsing import clean_column_name 

467 

468 d: Dict[str, ABCSeries] = {} 

469 for axis_name in self._AXIS_ORDERS: 

470 d.update(self._get_axis_resolvers(axis_name)) 

471 

472 return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} 

473 

474 def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: 

475 """ 

476 Return the special character free column resolvers of a dataframe. 

477 

478 Column names with special characters are 'cleaned up' so that they can 

479 be referred to by backtick quoting. 

480 Used in :meth:`DataFrame.eval`. 

481 """ 

482 from pandas.core.computation.parsing import clean_column_name 

483 

484 if isinstance(self, ABCSeries): 

485 return {clean_column_name(self.name): self} 

486 

487 return { 

488 clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) 

489 } 

490 

491 @property 

492 def _info_axis(self): 

493 return getattr(self, self._info_axis_name) 

494 

495 @property 

496 def _stat_axis(self): 

497 return getattr(self, self._stat_axis_name) 

498 

499 @property 

500 def shape(self) -> Tuple[int, ...]: 

501 """ 

502 Return a tuple of axis dimensions 

503 """ 

504 return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) 

505 

506 @property 

507 def axes(self) -> List[Index]: 

508 """ 

509 Return index label(s) of the internal NDFrame 

510 """ 

511 # we do it this way because if we have reversed axes, then 

512 # the block manager shows then reversed 

513 return [self._get_axis(a) for a in self._AXIS_ORDERS] 

514 

515 @property 

516 def ndim(self) -> int: 

517 """ 

518 Return an int representing the number of axes / array dimensions. 

519 

520 Return 1 if Series. Otherwise return 2 if DataFrame. 

521 

522 See Also 

523 -------- 

524 ndarray.ndim : Number of array dimensions. 

525 

526 Examples 

527 -------- 

528 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

529 >>> s.ndim 

530 1 

531 

532 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

533 >>> df.ndim 

534 2 

535 """ 

536 return self._data.ndim 

537 

538 @property 

539 def size(self): 

540 """ 

541 Return an int representing the number of elements in this object. 

542 

543 Return the number of rows if Series. Otherwise return the number of 

544 rows times number of columns if DataFrame. 

545 

546 See Also 

547 -------- 

548 ndarray.size : Number of elements in the array. 

549 

550 Examples 

551 -------- 

552 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

553 >>> s.size 

554 3 

555 

556 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

557 >>> df.size 

558 4 

559 """ 

560 return np.prod(self.shape) 

561 

562 @property 

563 def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: 

564 """ internal compat with SelectionMixin """ 

565 return self 

566 

567 @property 

568 def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: 

569 """ internal compat with SelectionMixin """ 

570 return self 

571 

572 def set_axis(self, labels, axis=0, inplace=False): 

573 """ 

574 Assign desired index to given axis. 

575 

576 Indexes for column or row labels can be changed by assigning 

577 a list-like or Index. 

578 

579 .. versionchanged:: 0.21.0 

580 

581 The signature is now `labels` and `axis`, consistent with 

582 the rest of pandas API. Previously, the `axis` and `labels` 

583 arguments were respectively the first and second positional 

584 arguments. 

585 

586 Parameters 

587 ---------- 

588 labels : list-like, Index 

589 The values for the new index. 

590 

591 axis : {0 or 'index', 1 or 'columns'}, default 0 

592 The axis to update. The value 0 identifies the rows, and 1 

593 identifies the columns. 

594 

595 inplace : bool, default False 

596 Whether to return a new %(klass)s instance. 

597 

598 Returns 

599 ------- 

600 renamed : %(klass)s or None 

601 An object of same type as caller if inplace=False, None otherwise. 

602 

603 See Also 

604 -------- 

605 DataFrame.rename_axis : Alter the name of the index or columns. 

606 

607 Examples 

608 -------- 

609 **Series** 

610 

611 >>> s = pd.Series([1, 2, 3]) 

612 >>> s 

613 0 1 

614 1 2 

615 2 3 

616 dtype: int64 

617 

618 >>> s.set_axis(['a', 'b', 'c'], axis=0) 

619 a 1 

620 b 2 

621 c 3 

622 dtype: int64 

623 

624 **DataFrame** 

625 

626 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 

627 

628 Change the row labels. 

629 

630 >>> df.set_axis(['a', 'b', 'c'], axis='index') 

631 A B 

632 a 1 4 

633 b 2 5 

634 c 3 6 

635 

636 Change the column labels. 

637 

638 >>> df.set_axis(['I', 'II'], axis='columns') 

639 I II 

640 0 1 4 

641 1 2 5 

642 2 3 6 

643 

644 Now, update the labels inplace. 

645 

646 >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True) 

647 >>> df 

648 i ii 

649 0 1 4 

650 1 2 5 

651 2 3 6 

652 """ 

653 if inplace: 

654 setattr(self, self._get_axis_name(axis), labels) 

655 else: 

656 obj = self.copy() 

657 obj.set_axis(labels, axis=axis, inplace=True) 

658 return obj 

659 

660 def _set_axis(self, axis, labels) -> None: 

661 self._data.set_axis(axis, labels) 

662 self._clear_item_cache() 

663 

664 def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: 

665 """ 

666 Interchange axes and swap values axes appropriately. 

667 

668 Returns 

669 ------- 

670 y : same as input 

671 """ 

672 i = self._get_axis_number(axis1) 

673 j = self._get_axis_number(axis2) 

674 

675 if i == j: 

676 if copy: 

677 return self.copy() 

678 return self 

679 

680 mapping = {i: j, j: i} 

681 

682 new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) 

683 new_values = self.values.swapaxes(i, j) 

684 if copy: 

685 new_values = new_values.copy() 

686 

687 return self._constructor(new_values, *new_axes).__finalize__(self) 

688 

689 def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: 

690 """ 

691 Return DataFrame with requested index / column level(s) removed. 

692 

693 .. versionadded:: 0.24.0 

694 

695 Parameters 

696 ---------- 

697 level : int, str, or list-like 

698 If a string is given, must be the name of a level 

699 If list-like, elements must be names or positional indexes 

700 of levels. 

701 

702 axis : {0 or 'index', 1 or 'columns'}, default 0 

703 

704 Returns 

705 ------- 

706 DataFrame 

707 DataFrame with requested index / column level(s) removed. 

708 

709 Examples 

710 -------- 

711 >>> df = pd.DataFrame([ 

712 ... [1, 2, 3, 4], 

713 ... [5, 6, 7, 8], 

714 ... [9, 10, 11, 12] 

715 ... ]).set_index([0, 1]).rename_axis(['a', 'b']) 

716 

717 >>> df.columns = pd.MultiIndex.from_tuples([ 

718 ... ('c', 'e'), ('d', 'f') 

719 ... ], names=['level_1', 'level_2']) 

720 

721 >>> df 

722 level_1 c d 

723 level_2 e f 

724 a b 

725 1 2 3 4 

726 5 6 7 8 

727 9 10 11 12 

728 

729 >>> df.droplevel('a') 

730 level_1 c d 

731 level_2 e f 

732 b 

733 2 3 4 

734 6 7 8 

735 10 11 12 

736 

737 >>> df.droplevel('level2', axis=1) 

738 level_1 c d 

739 a b 

740 1 2 3 4 

741 5 6 7 8 

742 9 10 11 12 

743 """ 

744 labels = self._get_axis(axis) 

745 new_labels = labels.droplevel(level) 

746 result = self.set_axis(new_labels, axis=axis, inplace=False) 

747 return result 

748 

749 def pop(self: FrameOrSeries, item) -> FrameOrSeries: 

750 """ 

751 Return item and drop from frame. Raise KeyError if not found. 

752 

753 Parameters 

754 ---------- 

755 item : str 

756 Label of column to be popped. 

757 

758 Returns 

759 ------- 

760 Series 

761 

762 Examples 

763 -------- 

764 >>> df = pd.DataFrame([('falcon', 'bird', 389.0), 

765 ... ('parrot', 'bird', 24.0), 

766 ... ('lion', 'mammal', 80.5), 

767 ... ('monkey', 'mammal', np.nan)], 

768 ... columns=('name', 'class', 'max_speed')) 

769 >>> df 

770 name class max_speed 

771 0 falcon bird 389.0 

772 1 parrot bird 24.0 

773 2 lion mammal 80.5 

774 3 monkey mammal NaN 

775 

776 >>> df.pop('class') 

777 0 bird 

778 1 bird 

779 2 mammal 

780 3 mammal 

781 Name: class, dtype: object 

782 

783 >>> df 

784 name max_speed 

785 0 falcon 389.0 

786 1 parrot 24.0 

787 2 lion 80.5 

788 3 monkey NaN 

789 """ 

790 result = self[item] 

791 del self[item] 

792 try: 

793 result._reset_cacher() 

794 except AttributeError: 

795 pass 

796 

797 return result 

798 

799 def squeeze(self, axis=None): 

800 """ 

801 Squeeze 1 dimensional axis objects into scalars. 

802 

803 Series or DataFrames with a single element are squeezed to a scalar. 

804 DataFrames with a single column or a single row are squeezed to a 

805 Series. Otherwise the object is unchanged. 

806 

807 This method is most useful when you don't know if your 

808 object is a Series or DataFrame, but you do know it has just a single 

809 column. In that case you can safely call `squeeze` to ensure you have a 

810 Series. 

811 

812 Parameters 

813 ---------- 

814 axis : {0 or 'index', 1 or 'columns', None}, default None 

815 A specific axis to squeeze. By default, all length-1 axes are 

816 squeezed. 

817 

818 Returns 

819 ------- 

820 DataFrame, Series, or scalar 

821 The projection after squeezing `axis` or all the axes. 

822 

823 See Also 

824 -------- 

825 Series.iloc : Integer-location based indexing for selecting scalars. 

826 DataFrame.iloc : Integer-location based indexing for selecting Series. 

827 Series.to_frame : Inverse of DataFrame.squeeze for a 

828 single-column DataFrame. 

829 

830 Examples 

831 -------- 

832 >>> primes = pd.Series([2, 3, 5, 7]) 

833 

834 Slicing might produce a Series with a single value: 

835 

836 >>> even_primes = primes[primes % 2 == 0] 

837 >>> even_primes 

838 0 2 

839 dtype: int64 

840 

841 >>> even_primes.squeeze() 

842 2 

843 

844 Squeezing objects with more than one value in every axis does nothing: 

845 

846 >>> odd_primes = primes[primes % 2 == 1] 

847 >>> odd_primes 

848 1 3 

849 2 5 

850 3 7 

851 dtype: int64 

852 

853 >>> odd_primes.squeeze() 

854 1 3 

855 2 5 

856 3 7 

857 dtype: int64 

858 

859 Squeezing is even more effective when used with DataFrames. 

860 

861 >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) 

862 >>> df 

863 a b 

864 0 1 2 

865 1 3 4 

866 

867 Slicing a single column will produce a DataFrame with the columns 

868 having only one value: 

869 

870 >>> df_a = df[['a']] 

871 >>> df_a 

872 a 

873 0 1 

874 1 3 

875 

876 So the columns can be squeezed down, resulting in a Series: 

877 

878 >>> df_a.squeeze('columns') 

879 0 1 

880 1 3 

881 Name: a, dtype: int64 

882 

883 Slicing a single row from a single column will produce a single 

884 scalar DataFrame: 

885 

886 >>> df_0a = df.loc[df.index < 1, ['a']] 

887 >>> df_0a 

888 a 

889 0 1 

890 

891 Squeezing the rows produces a single scalar Series: 

892 

893 >>> df_0a.squeeze('rows') 

894 a 1 

895 Name: 0, dtype: int64 

896 

897 Squeezing all axes will project directly into a scalar: 

898 

899 >>> df_0a.squeeze() 

900 1 

901 """ 

902 axis = self._AXIS_NAMES if axis is None else (self._get_axis_number(axis),) 

903 return self.iloc[ 

904 tuple( 

905 0 if i in axis and len(a) == 1 else slice(None) 

906 for i, a in enumerate(self.axes) 

907 ) 

908 ] 

909 

910 def swaplevel(self: FrameOrSeries, i=-2, j=-1, axis=0) -> FrameOrSeries: 

911 """ 

912 Swap levels i and j in a MultiIndex on a particular axis 

913 

914 Parameters 

915 ---------- 

916 i, j : int, str (can be mixed) 

917 Level of index to be swapped. Can pass level name as string. 

918 

919 Returns 

920 ------- 

921 swapped : same type as caller (new object) 

922 """ 

923 axis = self._get_axis_number(axis) 

924 result = self.copy() 

925 labels = result._data.axes[axis] 

926 result._data.set_axis(axis, labels.swaplevel(i, j)) 

927 return result 

928 

929 # ---------------------------------------------------------------------- 

930 # Rename 

931 

932 def rename( 

933 self: FrameOrSeries, 

934 mapper: Optional[Renamer] = None, 

935 *, 

936 index: Optional[Renamer] = None, 

937 columns: Optional[Renamer] = None, 

938 axis: Optional[Axis] = None, 

939 copy: bool = True, 

940 inplace: bool = False, 

941 level: Optional[Level] = None, 

942 errors: str = "ignore", 

943 ) -> Optional[FrameOrSeries]: 

944 """ 

945 Alter axes input function or functions. Function / dict values must be 

946 unique (1-to-1). Labels not contained in a dict / Series will be left 

947 as-is. Extra labels listed don't throw an error. Alternatively, change 

948 ``Series.name`` with a scalar value (Series only). 

949 

950 Parameters 

951 ---------- 

952 %(axes)s : scalar, list-like, dict-like or function, optional 

953 Scalar or list-like will alter the ``Series.name`` attribute, 

954 and raise on DataFrame. 

955 dict-like or functions are transformations to apply to 

956 that axis' values 

957 copy : bool, default True 

958 Also copy underlying data. 

959 inplace : bool, default False 

960 Whether to return a new %(klass)s. If True then value of copy is 

961 ignored. 

962 level : int or level name, default None 

963 In case of a MultiIndex, only rename labels in the specified 

964 level. 

965 errors : {'ignore', 'raise'}, default 'ignore' 

966 If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, 

967 or `columns` contains labels that are not present in the Index 

968 being transformed. 

969 If 'ignore', existing keys will be renamed and extra keys will be 

970 ignored. 

971 

972 Returns 

973 ------- 

974 renamed : %(klass)s (new object) 

975 

976 Raises 

977 ------ 

978 KeyError 

979 If any of the labels is not found in the selected axis and 

980 "errors='raise'". 

981 

982 See Also 

983 -------- 

984 NDFrame.rename_axis 

985 

986 Examples 

987 -------- 

988 

989 >>> s = pd.Series([1, 2, 3]) 

990 >>> s 

991 0 1 

992 1 2 

993 2 3 

994 dtype: int64 

995 >>> s.rename("my_name") # scalar, changes Series.name 

996 0 1 

997 1 2 

998 2 3 

999 Name: my_name, dtype: int64 

1000 >>> s.rename(lambda x: x ** 2) # function, changes labels 

1001 0 1 

1002 1 2 

1003 4 3 

1004 dtype: int64 

1005 >>> s.rename({1: 3, 2: 5}) # mapping, changes labels 

1006 0 1 

1007 3 2 

1008 5 3 

1009 dtype: int64 

1010 

1011 Since ``DataFrame`` doesn't have a ``.name`` attribute, 

1012 only mapping-type arguments are allowed. 

1013 

1014 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 

1015 >>> df.rename(2) 

1016 Traceback (most recent call last): 

1017 ... 

1018 TypeError: 'int' object is not callable 

1019 

1020 ``DataFrame.rename`` supports two calling conventions 

1021 

1022 * ``(index=index_mapper, columns=columns_mapper, ...)`` 

1023 * ``(mapper, axis={'index', 'columns'}, ...)`` 

1024 

1025 We *highly* recommend using keyword arguments to clarify your 

1026 intent. 

1027 

1028 >>> df.rename(index=str, columns={"A": "a", "B": "c"}) 

1029 a c 

1030 0 1 4 

1031 1 2 5 

1032 2 3 6 

1033 

1034 >>> df.rename(index=str, columns={"A": "a", "C": "c"}) 

1035 a B 

1036 0 1 4 

1037 1 2 5 

1038 2 3 6 

1039 

1040 Using axis-style parameters 

1041 

1042 >>> df.rename(str.lower, axis='columns') 

1043 a b 

1044 0 1 4 

1045 1 2 5 

1046 2 3 6 

1047 

1048 >>> df.rename({1: 2, 2: 4}, axis='index') 

1049 A B 

1050 0 1 4 

1051 2 2 5 

1052 4 3 6 

1053 

1054 See the :ref:`user guide <basics.rename>` for more. 

1055 """ 

1056 if mapper is None and index is None and columns is None: 

1057 raise TypeError("must pass an index to rename") 

1058 

1059 if index is not None or columns is not None: 

1060 if axis is not None: 

1061 raise TypeError( 

1062 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

1063 ) 

1064 elif mapper is not None: 

1065 raise TypeError( 

1066 "Cannot specify both 'mapper' and any of 'index' or 'columns'" 

1067 ) 

1068 else: 

1069 # use the mapper argument 

1070 if axis and self._get_axis_number(axis) == 1: 

1071 columns = mapper 

1072 else: 

1073 index = mapper 

1074 

1075 result = self if inplace else self.copy(deep=copy) 

1076 

1077 for axis_no, replacements in enumerate((index, columns)): 

1078 if replacements is None: 

1079 continue 

1080 

1081 ax = self._get_axis(axis_no) 

1082 baxis = self._get_block_manager_axis(axis_no) 

1083 f = com.get_rename_function(replacements) 

1084 

1085 if level is not None: 

1086 level = ax._get_level_number(level) 

1087 

1088 # GH 13473 

1089 if not callable(replacements): 

1090 indexer = ax.get_indexer_for(replacements) 

1091 if errors == "raise" and len(indexer[indexer == -1]): 

1092 missing_labels = [ 

1093 label 

1094 for index, label in enumerate(replacements) 

1095 if indexer[index] == -1 

1096 ] 

1097 raise KeyError(f"{missing_labels} not found in axis") 

1098 

1099 result._data = result._data.rename_axis( 

1100 f, axis=baxis, copy=copy, level=level 

1101 ) 

1102 result._clear_item_cache() 

1103 

1104 if inplace: 

1105 self._update_inplace(result._data) 

1106 return None 

1107 else: 

1108 return result.__finalize__(self) 

1109 

1110 @rewrite_axis_style_signature("mapper", [("copy", True), ("inplace", False)]) 

1111 def rename_axis(self, mapper=lib.no_default, **kwargs): 

1112 """ 

1113 Set the name of the axis for the index or columns. 

1114 

1115 Parameters 

1116 ---------- 

1117 mapper : scalar, list-like, optional 

1118 Value to set the axis name attribute. 

1119 index, columns : scalar, list-like, dict-like or function, optional 

1120 A scalar, list-like, dict-like or functions transformations to 

1121 apply to that axis' values. 

1122 

1123 Use either ``mapper`` and ``axis`` to 

1124 specify the axis to target with ``mapper``, or ``index`` 

1125 and/or ``columns``. 

1126 

1127 .. versionchanged:: 0.24.0 

1128 

1129 axis : {0 or 'index', 1 or 'columns'}, default 0 

1130 The axis to rename. 

1131 copy : bool, default True 

1132 Also copy underlying data. 

1133 inplace : bool, default False 

1134 Modifies the object directly, instead of creating a new Series 

1135 or DataFrame. 

1136 

1137 Returns 

1138 ------- 

1139 Series, DataFrame, or None 

1140 The same type as the caller or None if `inplace` is True. 

1141 

1142 See Also 

1143 -------- 

1144 Series.rename : Alter Series index labels or name. 

1145 DataFrame.rename : Alter DataFrame index labels or name. 

1146 Index.rename : Set new names on index. 

1147 

1148 Notes 

1149 ----- 

1150 ``DataFrame.rename_axis`` supports two calling conventions 

1151 

1152 * ``(index=index_mapper, columns=columns_mapper, ...)`` 

1153 * ``(mapper, axis={'index', 'columns'}, ...)`` 

1154 

1155 The first calling convention will only modify the names of 

1156 the index and/or the names of the Index object that is the columns. 

1157 In this case, the parameter ``copy`` is ignored. 

1158 

1159 The second calling convention will modify the names of the 

1160 the corresponding index if mapper is a list or a scalar. 

1161 However, if mapper is dict-like or a function, it will use the 

1162 deprecated behavior of modifying the axis *labels*. 

1163 

1164 We *highly* recommend using keyword arguments to clarify your 

1165 intent. 

1166 

1167 Examples 

1168 -------- 

1169 **Series** 

1170 

1171 >>> s = pd.Series(["dog", "cat", "monkey"]) 

1172 >>> s 

1173 0 dog 

1174 1 cat 

1175 2 monkey 

1176 dtype: object 

1177 >>> s.rename_axis("animal") 

1178 animal 

1179 0 dog 

1180 1 cat 

1181 2 monkey 

1182 dtype: object 

1183 

1184 **DataFrame** 

1185 

1186 >>> df = pd.DataFrame({"num_legs": [4, 4, 2], 

1187 ... "num_arms": [0, 0, 2]}, 

1188 ... ["dog", "cat", "monkey"]) 

1189 >>> df 

1190 num_legs num_arms 

1191 dog 4 0 

1192 cat 4 0 

1193 monkey 2 2 

1194 >>> df = df.rename_axis("animal") 

1195 >>> df 

1196 num_legs num_arms 

1197 animal 

1198 dog 4 0 

1199 cat 4 0 

1200 monkey 2 2 

1201 >>> df = df.rename_axis("limbs", axis="columns") 

1202 >>> df 

1203 limbs num_legs num_arms 

1204 animal 

1205 dog 4 0 

1206 cat 4 0 

1207 monkey 2 2 

1208 

1209 **MultiIndex** 

1210 

1211 >>> df.index = pd.MultiIndex.from_product([['mammal'], 

1212 ... ['dog', 'cat', 'monkey']], 

1213 ... names=['type', 'name']) 

1214 >>> df 

1215 limbs num_legs num_arms 

1216 type name 

1217 mammal dog 4 0 

1218 cat 4 0 

1219 monkey 2 2 

1220 

1221 >>> df.rename_axis(index={'type': 'class'}) 

1222 limbs num_legs num_arms 

1223 class name 

1224 mammal dog 4 0 

1225 cat 4 0 

1226 monkey 2 2 

1227 

1228 >>> df.rename_axis(columns=str.upper) 

1229 LIMBS num_legs num_arms 

1230 type name 

1231 mammal dog 4 0 

1232 cat 4 0 

1233 monkey 2 2 

1234 """ 

1235 axes, kwargs = self._construct_axes_from_arguments( 

1236 (), kwargs, sentinel=lib.no_default 

1237 ) 

1238 copy = kwargs.pop("copy", True) 

1239 inplace = kwargs.pop("inplace", False) 

1240 axis = kwargs.pop("axis", 0) 

1241 if axis is not None: 

1242 axis = self._get_axis_number(axis) 

1243 

1244 if kwargs: 

1245 raise TypeError( 

1246 "rename_axis() got an unexpected keyword " 

1247 f'argument "{list(kwargs.keys())[0]}"' 

1248 ) 

1249 

1250 inplace = validate_bool_kwarg(inplace, "inplace") 

1251 

1252 if mapper is not lib.no_default: 

1253 # Use v0.23 behavior if a scalar or list 

1254 non_mapper = is_scalar(mapper) or ( 

1255 is_list_like(mapper) and not is_dict_like(mapper) 

1256 ) 

1257 if non_mapper: 

1258 return self._set_axis_name(mapper, axis=axis, inplace=inplace) 

1259 else: 

1260 raise ValueError("Use `.rename` to alter labels with a mapper.") 

1261 else: 

1262 # Use new behavior. Means that index and/or columns 

1263 # is specified 

1264 result = self if inplace else self.copy(deep=copy) 

1265 

1266 for axis in range(self._AXIS_LEN): 

1267 v = axes.get(self._AXIS_NAMES[axis]) 

1268 if v is lib.no_default: 

1269 continue 

1270 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) 

1271 if non_mapper: 

1272 newnames = v 

1273 else: 

1274 f = com.get_rename_function(v) 

1275 curnames = self._get_axis(axis).names 

1276 newnames = [f(name) for name in curnames] 

1277 result._set_axis_name(newnames, axis=axis, inplace=True) 

1278 if not inplace: 

1279 return result 

1280 

1281 def _set_axis_name(self, name, axis=0, inplace=False): 

1282 """ 

1283 Set the name(s) of the axis. 

1284 

1285 Parameters 

1286 ---------- 

1287 name : str or list of str 

1288 Name(s) to set. 

1289 axis : {0 or 'index', 1 or 'columns'}, default 0 

1290 The axis to set the label. The value 0 or 'index' specifies index, 

1291 and the value 1 or 'columns' specifies columns. 

1292 inplace : bool, default False 

1293 If `True`, do operation inplace and return None. 

1294 

1295 .. versionadded:: 0.21.0 

1296 

1297 Returns 

1298 ------- 

1299 Series, DataFrame, or None 

1300 The same type as the caller or `None` if `inplace` is `True`. 

1301 

1302 See Also 

1303 -------- 

1304 DataFrame.rename : Alter the axis labels of :class:`DataFrame`. 

1305 Series.rename : Alter the index labels or set the index name 

1306 of :class:`Series`. 

1307 Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. 

1308 

1309 Examples 

1310 -------- 

1311 >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, 

1312 ... ["dog", "cat", "monkey"]) 

1313 >>> df 

1314 num_legs 

1315 dog 4 

1316 cat 4 

1317 monkey 2 

1318 >>> df._set_axis_name("animal") 

1319 num_legs 

1320 animal 

1321 dog 4 

1322 cat 4 

1323 monkey 2 

1324 >>> df.index = pd.MultiIndex.from_product( 

1325 ... [["mammal"], ['dog', 'cat', 'monkey']]) 

1326 >>> df._set_axis_name(["type", "name"]) 

1327 legs 

1328 type name 

1329 mammal dog 4 

1330 cat 4 

1331 monkey 2 

1332 """ 

1333 axis = self._get_axis_number(axis) 

1334 idx = self._get_axis(axis).set_names(name) 

1335 

1336 inplace = validate_bool_kwarg(inplace, "inplace") 

1337 renamed = self if inplace else self.copy() 

1338 renamed.set_axis(idx, axis=axis, inplace=True) 

1339 if not inplace: 

1340 return renamed 

1341 

1342 # ---------------------------------------------------------------------- 

1343 # Comparison Methods 

1344 

1345 def _indexed_same(self, other) -> bool: 

1346 return all( 

1347 self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS 

1348 ) 

1349 

1350 def equals(self, other): 

1351 """ 

1352 Test whether two objects contain the same elements. 

1353 

1354 This function allows two Series or DataFrames to be compared against 

1355 each other to see if they have the same shape and elements. NaNs in 

1356 the same location are considered equal. The column headers do not 

1357 need to have the same type, but the elements within the columns must 

1358 be the same dtype. 

1359 

1360 Parameters 

1361 ---------- 

1362 other : Series or DataFrame 

1363 The other Series or DataFrame to be compared with the first. 

1364 

1365 Returns 

1366 ------- 

1367 bool 

1368 True if all elements are the same in both objects, False 

1369 otherwise. 

1370 

1371 See Also 

1372 -------- 

1373 Series.eq : Compare two Series objects of the same length 

1374 and return a Series where each element is True if the element 

1375 in each Series is equal, False otherwise. 

1376 DataFrame.eq : Compare two DataFrame objects of the same shape and 

1377 return a DataFrame where each element is True if the respective 

1378 element in each DataFrame is equal, False otherwise. 

1379 testing.assert_series_equal : Raises an AssertionError if left and 

1380 right are not equal. Provides an easy interface to ignore 

1381 inequality in dtypes, indexes and precision among others. 

1382 testing.assert_frame_equal : Like assert_series_equal, but targets 

1383 DataFrames. 

1384 numpy.array_equal : Return True if two arrays have the same shape 

1385 and elements, False otherwise. 

1386 

1387 Notes 

1388 ----- 

1389 This function requires that the elements have the same dtype as their 

1390 respective elements in the other Series or DataFrame. However, the 

1391 column labels do not need to have the same type, as long as they are 

1392 still considered equal. 

1393 

1394 Examples 

1395 -------- 

1396 >>> df = pd.DataFrame({1: [10], 2: [20]}) 

1397 >>> df 

1398 1 2 

1399 0 10 20 

1400 

1401 DataFrames df and exactly_equal have the same types and values for 

1402 their elements and column labels, which will return True. 

1403 

1404 >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) 

1405 >>> exactly_equal 

1406 1 2 

1407 0 10 20 

1408 >>> df.equals(exactly_equal) 

1409 True 

1410 

1411 DataFrames df and different_column_type have the same element 

1412 types and values, but have different types for the column labels, 

1413 which will still return True. 

1414 

1415 >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) 

1416 >>> different_column_type 

1417 1.0 2.0 

1418 0 10 20 

1419 >>> df.equals(different_column_type) 

1420 True 

1421 

1422 DataFrames df and different_data_type have different types for the 

1423 same values for their elements, and will return False even though 

1424 their column labels are the same values and types. 

1425 

1426 >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) 

1427 >>> different_data_type 

1428 1 2 

1429 0 10.0 20.0 

1430 >>> df.equals(different_data_type) 

1431 False 

1432 """ 

1433 if not isinstance(other, self._constructor): 

1434 return False 

1435 return self._data.equals(other._data) 

1436 

1437 # ------------------------------------------------------------------------- 

1438 # Unary Methods 

1439 

1440 def __neg__(self): 

1441 values = com.values_from_object(self) 

1442 if is_bool_dtype(values): 

1443 arr = operator.inv(values) 

1444 elif ( 

1445 is_numeric_dtype(values) 

1446 or is_timedelta64_dtype(values) 

1447 or is_object_dtype(values) 

1448 ): 

1449 arr = operator.neg(values) 

1450 else: 

1451 raise TypeError(f"Unary negative expects numeric dtype, not {values.dtype}") 

1452 return self.__array_wrap__(arr) 

1453 

1454 def __pos__(self): 

1455 values = com.values_from_object(self) 

1456 if is_bool_dtype(values) or is_period_arraylike(values): 

1457 arr = values 

1458 elif ( 

1459 is_numeric_dtype(values) 

1460 or is_timedelta64_dtype(values) 

1461 or is_object_dtype(values) 

1462 ): 

1463 arr = operator.pos(values) 

1464 else: 

1465 raise TypeError(f"Unary plus expects numeric dtype, not {values.dtype}") 

1466 return self.__array_wrap__(arr) 

1467 

1468 def __invert__(self): 

1469 if not self.size: 

1470 # inv fails with 0 len 

1471 return self 

1472 

1473 new_data = self._data.apply(operator.invert) 

1474 result = self._constructor(new_data).__finalize__(self) 

1475 return result 

1476 

1477 def __nonzero__(self): 

1478 raise ValueError( 

1479 f"The truth value of a {type(self).__name__} is ambiguous. " 

1480 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

1481 ) 

1482 

1483 __bool__ = __nonzero__ 

1484 

1485 def bool(self): 

1486 """ 

1487 Return the bool of a single element PandasObject. 

1488 

1489 This must be a boolean scalar value, either True or False. Raise a 

1490 ValueError if the PandasObject does not have exactly 1 element, or that 

1491 element is not boolean 

1492 

1493 Returns 

1494 ------- 

1495 bool 

1496 Same single boolean value converted to bool type. 

1497 """ 

1498 v = self.squeeze() 

1499 if isinstance(v, (bool, np.bool_)): 

1500 return bool(v) 

1501 elif is_scalar(v): 

1502 raise ValueError( 

1503 "bool cannot act on a non-boolean single element " 

1504 f"{type(self).__name__}" 

1505 ) 

1506 

1507 self.__nonzero__() 

1508 

1509 def __abs__(self: FrameOrSeries) -> FrameOrSeries: 

1510 return self.abs() 

1511 

1512 def __round__(self: FrameOrSeries, decimals: int = 0) -> FrameOrSeries: 

1513 return self.round(decimals) 

1514 

1515 # ------------------------------------------------------------------------- 

1516 # Label or Level Combination Helpers 

1517 # 

1518 # A collection of helper methods for DataFrame/Series operations that 

1519 # accept a combination of column/index labels and levels. All such 

1520 # operations should utilize/extend these methods when possible so that we 

1521 # have consistent precedence and validation logic throughout the library. 

1522 

1523 def _is_level_reference(self, key, axis=0): 

1524 """ 

1525 Test whether a key is a level reference for a given axis. 

1526 

1527 To be considered a level reference, `key` must be a string that: 

1528 - (axis=0): Matches the name of an index level and does NOT match 

1529 a column label. 

1530 - (axis=1): Matches the name of a column level and does NOT match 

1531 an index label. 

1532 

1533 Parameters 

1534 ---------- 

1535 key : str 

1536 Potential level name for the given axis 

1537 axis : int, default 0 

1538 Axis that levels are associated with (0 for index, 1 for columns) 

1539 

1540 Returns 

1541 ------- 

1542 is_level : bool 

1543 """ 

1544 axis = self._get_axis_number(axis) 

1545 

1546 return ( 

1547 key is not None 

1548 and is_hashable(key) 

1549 and key in self.axes[axis].names 

1550 and not self._is_label_reference(key, axis=axis) 

1551 ) 

1552 

1553 def _is_label_reference(self, key, axis=0) -> bool_t: 

1554 """ 

1555 Test whether a key is a label reference for a given axis. 

1556 

1557 To be considered a label reference, `key` must be a string that: 

1558 - (axis=0): Matches a column label 

1559 - (axis=1): Matches an index label 

1560 

1561 Parameters 

1562 ---------- 

1563 key: str 

1564 Potential label name 

1565 axis: int, default 0 

1566 Axis perpendicular to the axis that labels are associated with 

1567 (0 means search for column labels, 1 means search for index labels) 

1568 

1569 Returns 

1570 ------- 

1571 is_label: bool 

1572 """ 

1573 axis = self._get_axis_number(axis) 

1574 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) 

1575 

1576 return ( 

1577 key is not None 

1578 and is_hashable(key) 

1579 and any(key in self.axes[ax] for ax in other_axes) 

1580 ) 

1581 

1582 def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t: 

1583 """ 

1584 Test whether a key is a label or level reference for a given axis. 

1585 

1586 To be considered either a label or a level reference, `key` must be a 

1587 string that: 

1588 - (axis=0): Matches a column label or an index level 

1589 - (axis=1): Matches an index label or a column level 

1590 

1591 Parameters 

1592 ---------- 

1593 key: str 

1594 Potential label or level name 

1595 axis: int, default 0 

1596 Axis that levels are associated with (0 for index, 1 for columns) 

1597 

1598 Returns 

1599 ------- 

1600 is_label_or_level: bool 

1601 """ 

1602 return self._is_level_reference(key, axis=axis) or self._is_label_reference( 

1603 key, axis=axis 

1604 ) 

1605 

1606 def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: 

1607 """ 

1608 Check whether `key` is ambiguous. 

1609 

1610 By ambiguous, we mean that it matches both a level of the input 

1611 `axis` and a label of the other axis. 

1612 

1613 Parameters 

1614 ---------- 

1615 key: str or object 

1616 Label or level name. 

1617 axis: int, default 0 

1618 Axis that levels are associated with (0 for index, 1 for columns). 

1619 

1620 Raises 

1621 ------ 

1622 ValueError: `key` is ambiguous 

1623 """ 

1624 axis = self._get_axis_number(axis) 

1625 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) 

1626 

1627 if ( 

1628 key is not None 

1629 and is_hashable(key) 

1630 and key in self.axes[axis].names 

1631 and any(key in self.axes[ax] for ax in other_axes) 

1632 ): 

1633 

1634 # Build an informative and grammatical warning 

1635 level_article, level_type = ( 

1636 ("an", "index") if axis == 0 else ("a", "column") 

1637 ) 

1638 

1639 label_article, label_type = ( 

1640 ("a", "column") if axis == 0 else ("an", "index") 

1641 ) 

1642 

1643 msg = ( 

1644 f"'{key}' is both {level_article} {level_type} level and " 

1645 f"{label_article} {label_type} label, which is ambiguous." 

1646 ) 

1647 raise ValueError(msg) 

1648 

1649 def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: 

1650 """ 

1651 Return a 1-D array of values associated with `key`, a label or level 

1652 from the given `axis`. 

1653 

1654 Retrieval logic: 

1655 - (axis=0): Return column values if `key` matches a column label. 

1656 Otherwise return index level values if `key` matches an index 

1657 level. 

1658 - (axis=1): Return row values if `key` matches an index label. 

1659 Otherwise return column level values if 'key' matches a column 

1660 level 

1661 

1662 Parameters 

1663 ---------- 

1664 key: str 

1665 Label or level name. 

1666 axis: int, default 0 

1667 Axis that levels are associated with (0 for index, 1 for columns) 

1668 

1669 Returns 

1670 ------- 

1671 values: np.ndarray 

1672 

1673 Raises 

1674 ------ 

1675 KeyError 

1676 if `key` matches neither a label nor a level 

1677 ValueError 

1678 if `key` matches multiple labels 

1679 FutureWarning 

1680 if `key` is ambiguous. This will become an ambiguity error in a 

1681 future version 

1682 """ 

1683 axis = self._get_axis_number(axis) 

1684 other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] 

1685 

1686 if self._is_label_reference(key, axis=axis): 

1687 self._check_label_or_level_ambiguity(key, axis=axis) 

1688 values = self.xs(key, axis=other_axes[0])._values 

1689 elif self._is_level_reference(key, axis=axis): 

1690 values = self.axes[axis].get_level_values(key)._values 

1691 else: 

1692 raise KeyError(key) 

1693 

1694 # Check for duplicates 

1695 if values.ndim > 1: 

1696 

1697 if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): 

1698 multi_message = ( 

1699 "\n" 

1700 "For a multi-index, the label must be a " 

1701 "tuple with elements corresponding to " 

1702 "each level." 

1703 ) 

1704 else: 

1705 multi_message = "" 

1706 

1707 label_axis_name = "column" if axis == 0 else "index" 

1708 raise ValueError( 

1709 ( 

1710 f"The {label_axis_name} label '{key}' " 

1711 f"is not unique.{multi_message}" 

1712 ) 

1713 ) 

1714 

1715 return values 

1716 

1717 def _drop_labels_or_levels(self, keys, axis: int = 0): 

1718 """ 

1719 Drop labels and/or levels for the given `axis`. 

1720 

1721 For each key in `keys`: 

1722 - (axis=0): If key matches a column label then drop the column. 

1723 Otherwise if key matches an index level then drop the level. 

1724 - (axis=1): If key matches an index label then drop the row. 

1725 Otherwise if key matches a column level then drop the level. 

1726 

1727 Parameters 

1728 ---------- 

1729 keys: str or list of str 

1730 labels or levels to drop 

1731 axis: int, default 0 

1732 Axis that levels are associated with (0 for index, 1 for columns) 

1733 

1734 Returns 

1735 ------- 

1736 dropped: DataFrame 

1737 

1738 Raises 

1739 ------ 

1740 ValueError 

1741 if any `keys` match neither a label nor a level 

1742 """ 

1743 axis = self._get_axis_number(axis) 

1744 

1745 # Validate keys 

1746 keys = com.maybe_make_list(keys) 

1747 invalid_keys = [ 

1748 k for k in keys if not self._is_label_or_level_reference(k, axis=axis) 

1749 ] 

1750 

1751 if invalid_keys: 

1752 raise ValueError( 

1753 ( 

1754 "The following keys are not valid labels or " 

1755 f"levels for axis {axis}: {invalid_keys}" 

1756 ) 

1757 ) 

1758 

1759 # Compute levels and labels to drop 

1760 levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] 

1761 

1762 labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] 

1763 

1764 # Perform copy upfront and then use inplace operations below. 

1765 # This ensures that we always perform exactly one copy. 

1766 # ``copy`` and/or ``inplace`` options could be added in the future. 

1767 dropped = self.copy() 

1768 

1769 if axis == 0: 

1770 # Handle dropping index levels 

1771 if levels_to_drop: 

1772 dropped.reset_index(levels_to_drop, drop=True, inplace=True) 

1773 

1774 # Handle dropping columns labels 

1775 if labels_to_drop: 

1776 dropped.drop(labels_to_drop, axis=1, inplace=True) 

1777 else: 

1778 # Handle dropping column levels 

1779 if levels_to_drop: 

1780 if isinstance(dropped.columns, MultiIndex): 

1781 # Drop the specified levels from the MultiIndex 

1782 dropped.columns = dropped.columns.droplevel(levels_to_drop) 

1783 else: 

1784 # Drop the last level of Index by replacing with 

1785 # a RangeIndex 

1786 dropped.columns = RangeIndex(dropped.columns.size) 

1787 

1788 # Handle dropping index labels 

1789 if labels_to_drop: 

1790 dropped.drop(labels_to_drop, axis=0, inplace=True) 

1791 

1792 return dropped 

1793 

1794 # ---------------------------------------------------------------------- 

1795 # Iteration 

1796 

1797 def __hash__(self): 

1798 raise TypeError( 

1799 f"{repr(type(self).__name__)} objects are mutable, " 

1800 f"thus they cannot be hashed" 

1801 ) 

1802 

1803 def __iter__(self): 

1804 """ 

1805 Iterate over info axis. 

1806 

1807 Returns 

1808 ------- 

1809 iterator 

1810 Info axis as iterator. 

1811 """ 

1812 return iter(self._info_axis) 

1813 

1814 # can we get a better explanation of this? 

1815 def keys(self): 

1816 """ 

1817 Get the 'info axis' (see Indexing for more). 

1818 

1819 This is index for Series, columns for DataFrame. 

1820 

1821 Returns 

1822 ------- 

1823 Index 

1824 Info axis. 

1825 """ 

1826 return self._info_axis 

1827 

1828 def items(self): 

1829 """Iterate over (label, values) on info axis 

1830 

1831 This is index for Series and columns for DataFrame. 

1832 

1833 Returns 

1834 ------- 

1835 Generator 

1836 """ 

1837 for h in self._info_axis: 

1838 yield h, self[h] 

1839 

1840 @Appender(items.__doc__) 

1841 def iteritems(self): 

1842 return self.items() 

1843 

1844 def __len__(self) -> int: 

1845 """Returns length of info axis""" 

1846 return len(self._info_axis) 

1847 

1848 def __contains__(self, key) -> bool_t: 

1849 """True if the key is in the info axis""" 

1850 return key in self._info_axis 

1851 

1852 @property 

1853 def empty(self) -> bool_t: 

1854 """ 

1855 Indicator whether DataFrame is empty. 

1856 

1857 True if DataFrame is entirely empty (no items), meaning any of the 

1858 axes are of length 0. 

1859 

1860 Returns 

1861 ------- 

1862 bool 

1863 If DataFrame is empty, return True, if not return False. 

1864 

1865 See Also 

1866 -------- 

1867 Series.dropna 

1868 DataFrame.dropna 

1869 

1870 Notes 

1871 ----- 

1872 If DataFrame contains only NaNs, it is still not considered empty. See 

1873 the example below. 

1874 

1875 Examples 

1876 -------- 

1877 An example of an actual empty DataFrame. Notice the index is empty: 

1878 

1879 >>> df_empty = pd.DataFrame({'A' : []}) 

1880 >>> df_empty 

1881 Empty DataFrame 

1882 Columns: [A] 

1883 Index: [] 

1884 >>> df_empty.empty 

1885 True 

1886 

1887 If we only have NaNs in our DataFrame, it is not considered empty! We 

1888 will need to drop the NaNs to make the DataFrame empty: 

1889 

1890 >>> df = pd.DataFrame({'A' : [np.nan]}) 

1891 >>> df 

1892 A 

1893 0 NaN 

1894 >>> df.empty 

1895 False 

1896 >>> df.dropna().empty 

1897 True 

1898 """ 

1899 return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) 

1900 

1901 # ---------------------------------------------------------------------- 

1902 # Array Interface 

1903 

1904 # This is also set in IndexOpsMixin 

1905 # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented 

1906 __array_priority__ = 1000 

1907 

1908 def __array__(self, dtype=None) -> np.ndarray: 

1909 return com.values_from_object(self) 

1910 

1911 def __array_wrap__(self, result, context=None): 

1912 result = lib.item_from_zerodim(result) 

1913 if is_scalar(result): 

1914 # e.g. we get here with np.ptp(series) 

1915 # ptp also requires the item_from_zerodim 

1916 return result 

1917 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) 

1918 return self._constructor(result, **d).__finalize__(self) 

1919 

1920 # ideally we would define this to avoid the getattr checks, but 

1921 # is slower 

1922 # @property 

1923 # def __array_interface__(self): 

1924 # """ provide numpy array interface method """ 

1925 # values = self.values 

1926 # return dict(typestr=values.dtype.str,shape=values.shape,data=values) 

1927 

1928 # ---------------------------------------------------------------------- 

1929 # Picklability 

1930 

1931 def __getstate__(self) -> Dict[str, Any]: 

1932 meta = {k: getattr(self, k, None) for k in self._metadata} 

1933 return dict( 

1934 _data=self._data, 

1935 _typ=self._typ, 

1936 _metadata=self._metadata, 

1937 attrs=self.attrs, 

1938 **meta, 

1939 ) 

1940 

1941 def __setstate__(self, state): 

1942 

1943 if isinstance(state, BlockManager): 

1944 self._data = state 

1945 elif isinstance(state, dict): 

1946 typ = state.get("_typ") 

1947 if typ is not None: 

1948 attrs = state.get("_attrs", {}) 

1949 object.__setattr__(self, "_attrs", attrs) 

1950 

1951 # set in the order of internal names 

1952 # to avoid definitional recursion 

1953 # e.g. say fill_value needing _data to be 

1954 # defined 

1955 meta = set(self._internal_names + self._metadata) 

1956 for k in list(meta): 

1957 if k in state: 

1958 v = state[k] 

1959 object.__setattr__(self, k, v) 

1960 

1961 for k, v in state.items(): 

1962 if k not in meta: 

1963 object.__setattr__(self, k, v) 

1964 

1965 else: 

1966 self._unpickle_series_compat(state) 

1967 elif len(state) == 2: 

1968 self._unpickle_series_compat(state) 

1969 

1970 self._item_cache = {} 

1971 

1972 # ---------------------------------------------------------------------- 

1973 # Rendering Methods 

1974 

1975 def __repr__(self) -> str: 

1976 # string representation based upon iterating over self 

1977 # (since, by definition, `PandasContainers` are iterable) 

1978 prepr = f"[{','.join(map(pprint_thing, self))}]" 

1979 return f"{type(self).__name__}({prepr})" 

1980 

1981 def _repr_latex_(self): 

1982 """ 

1983 Returns a LaTeX representation for a particular object. 

1984 Mainly for use with nbconvert (jupyter notebook conversion to pdf). 

1985 """ 

1986 if config.get_option("display.latex.repr"): 

1987 return self.to_latex() 

1988 else: 

1989 return None 

1990 

1991 def _repr_data_resource_(self): 

1992 """ 

1993 Not a real Jupyter special repr method, but we use the same 

1994 naming convention. 

1995 """ 

1996 if config.get_option("display.html.table_schema"): 

1997 data = self.head(config.get_option("display.max_rows")) 

1998 payload = json.loads( 

1999 data.to_json(orient="table"), object_pairs_hook=collections.OrderedDict 

2000 ) 

2001 return payload 

2002 

2003 # ---------------------------------------------------------------------- 

2004 # I/O Methods 

2005 

2006 _shared_docs[ 

2007 "to_markdown" 

2008 ] = """ 

2009 Print %(klass)s in Markdown-friendly format. 

2010 

2011 .. versionadded:: 1.0.0 

2012 

2013 Parameters 

2014 ---------- 

2015 buf : writable buffer, defaults to sys.stdout 

2016 Where to send the output. By default, the output is printed to 

2017 sys.stdout. Pass a writable buffer if you need to further process 

2018 the output. 

2019 mode : str, optional 

2020 Mode in which file is opened. 

2021 **kwargs 

2022 These parameters will be passed to `tabulate`. 

2023 

2024 Returns 

2025 ------- 

2026 str 

2027 %(klass)s in Markdown-friendly format. 

2028 """ 

2029 

2030 _shared_docs[ 

2031 "to_excel" 

2032 ] = """ 

2033 Write %(klass)s to an Excel sheet. 

2034 

2035 To write a single %(klass)s to an Excel .xlsx file it is only necessary to 

2036 specify a target file name. To write to multiple sheets it is necessary to 

2037 create an `ExcelWriter` object with a target file name, and specify a sheet 

2038 in the file to write to. 

2039 

2040 Multiple sheets may be written to by specifying unique `sheet_name`. 

2041 With all data written to the file it is necessary to save the changes. 

2042 Note that creating an `ExcelWriter` object with a file name that already 

2043 exists will result in the contents of the existing file being erased. 

2044 

2045 Parameters 

2046 ---------- 

2047 excel_writer : str or ExcelWriter object 

2048 File path or existing ExcelWriter. 

2049 sheet_name : str, default 'Sheet1' 

2050 Name of sheet which will contain DataFrame. 

2051 na_rep : str, default '' 

2052 Missing data representation. 

2053 float_format : str, optional 

2054 Format string for floating point numbers. For example 

2055 ``float_format="%%.2f"`` will format 0.1234 to 0.12. 

2056 columns : sequence or list of str, optional 

2057 Columns to write. 

2058 header : bool or list of str, default True 

2059 Write out the column names. If a list of string is given it is 

2060 assumed to be aliases for the column names. 

2061 index : bool, default True 

2062 Write row names (index). 

2063 index_label : str or sequence, optional 

2064 Column label for index column(s) if desired. If not specified, and 

2065 `header` and `index` are True, then the index names are used. A 

2066 sequence should be given if the DataFrame uses MultiIndex. 

2067 startrow : int, default 0 

2068 Upper left cell row to dump data frame. 

2069 startcol : int, default 0 

2070 Upper left cell column to dump data frame. 

2071 engine : str, optional 

2072 Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this 

2073 via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and 

2074 ``io.excel.xlsm.writer``. 

2075 merge_cells : bool, default True 

2076 Write MultiIndex and Hierarchical Rows as merged cells. 

2077 encoding : str, optional 

2078 Encoding of the resulting excel file. Only necessary for xlwt, 

2079 other writers support unicode natively. 

2080 inf_rep : str, default 'inf' 

2081 Representation for infinity (there is no native representation for 

2082 infinity in Excel). 

2083 verbose : bool, default True 

2084 Display more information in the error logs. 

2085 freeze_panes : tuple of int (length 2), optional 

2086 Specifies the one-based bottommost row and rightmost column that 

2087 is to be frozen. 

2088 

2089 See Also 

2090 -------- 

2091 to_csv : Write DataFrame to a comma-separated values (csv) file. 

2092 ExcelWriter : Class for writing DataFrame objects into excel sheets. 

2093 read_excel : Read an Excel file into a pandas DataFrame. 

2094 read_csv : Read a comma-separated values (csv) file into DataFrame. 

2095 

2096 Notes 

2097 ----- 

2098 For compatibility with :meth:`~DataFrame.to_csv`, 

2099 to_excel serializes lists and dicts to strings before writing. 

2100 

2101 Once a workbook has been saved it is not possible write further data 

2102 without rewriting the whole workbook. 

2103 

2104 Examples 

2105 -------- 

2106 

2107 Create, write to and save a workbook: 

2108 

2109 >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], 

2110 ... index=['row 1', 'row 2'], 

2111 ... columns=['col 1', 'col 2']) 

2112 >>> df1.to_excel("output.xlsx") # doctest: +SKIP 

2113 

2114 To specify the sheet name: 

2115 

2116 >>> df1.to_excel("output.xlsx", 

2117 ... sheet_name='Sheet_name_1') # doctest: +SKIP 

2118 

2119 If you wish to write to more than one sheet in the workbook, it is 

2120 necessary to specify an ExcelWriter object: 

2121 

2122 >>> df2 = df1.copy() 

2123 >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP 

2124 ... df1.to_excel(writer, sheet_name='Sheet_name_1') 

2125 ... df2.to_excel(writer, sheet_name='Sheet_name_2') 

2126 

2127 ExcelWriter can also be used to append to an existing Excel file: 

2128 

2129 >>> with pd.ExcelWriter('output.xlsx', 

2130 ... mode='a') as writer: # doctest: +SKIP 

2131 ... df.to_excel(writer, sheet_name='Sheet_name_3') 

2132 

2133 To set the library that is used to write the Excel file, 

2134 you can pass the `engine` keyword (the default engine is 

2135 automatically chosen depending on the file extension): 

2136 

2137 >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP 

2138 """ 

2139 

2140 @Appender(_shared_docs["to_excel"] % dict(klass="object")) 

2141 def to_excel( 

2142 self, 

2143 excel_writer, 

2144 sheet_name="Sheet1", 

2145 na_rep="", 

2146 float_format=None, 

2147 columns=None, 

2148 header=True, 

2149 index=True, 

2150 index_label=None, 

2151 startrow=0, 

2152 startcol=0, 

2153 engine=None, 

2154 merge_cells=True, 

2155 encoding=None, 

2156 inf_rep="inf", 

2157 verbose=True, 

2158 freeze_panes=None, 

2159 ) -> None: 

2160 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

2161 

2162 from pandas.io.formats.excel import ExcelFormatter 

2163 

2164 formatter = ExcelFormatter( 

2165 df, 

2166 na_rep=na_rep, 

2167 cols=columns, 

2168 header=header, 

2169 float_format=float_format, 

2170 index=index, 

2171 index_label=index_label, 

2172 merge_cells=merge_cells, 

2173 inf_rep=inf_rep, 

2174 ) 

2175 formatter.write( 

2176 excel_writer, 

2177 sheet_name=sheet_name, 

2178 startrow=startrow, 

2179 startcol=startcol, 

2180 freeze_panes=freeze_panes, 

2181 engine=engine, 

2182 ) 

2183 

2184 def to_json( 

2185 self, 

2186 path_or_buf: Optional[FilePathOrBuffer] = None, 

2187 orient: Optional[str] = None, 

2188 date_format: Optional[str] = None, 

2189 double_precision: int = 10, 

2190 force_ascii: bool_t = True, 

2191 date_unit: str = "ms", 

2192 default_handler: Optional[Callable[[Any], JSONSerializable]] = None, 

2193 lines: bool_t = False, 

2194 compression: Optional[str] = "infer", 

2195 index: bool_t = True, 

2196 indent: Optional[int] = None, 

2197 ) -> Optional[str]: 

2198 """ 

2199 Convert the object to a JSON string. 

2200 

2201 Note NaN's and None will be converted to null and datetime objects 

2202 will be converted to UNIX timestamps. 

2203 

2204 Parameters 

2205 ---------- 

2206 path_or_buf : str or file handle, optional 

2207 File path or object. If not specified, the result is returned as 

2208 a string. 

2209 orient : str 

2210 Indication of expected JSON string format. 

2211 

2212 * Series: 

2213 

2214 - default is 'index' 

2215 - allowed values are: {'split','records','index','table'}. 

2216 

2217 * DataFrame: 

2218 

2219 - default is 'columns' 

2220 - allowed values are: {'split', 'records', 'index', 'columns', 

2221 'values', 'table'}. 

2222 

2223 * The format of the JSON string: 

2224 

2225 - 'split' : dict like {'index' -> [index], 'columns' -> [columns], 

2226 'data' -> [values]} 

2227 - 'records' : list like [{column -> value}, ... , {column -> value}] 

2228 - 'index' : dict like {index -> {column -> value}} 

2229 - 'columns' : dict like {column -> {index -> value}} 

2230 - 'values' : just the values array 

2231 - 'table' : dict like {'schema': {schema}, 'data': {data}} 

2232 

2233 Describing the data, where data component is like ``orient='records'``. 

2234 

2235 .. versionchanged:: 0.20.0 

2236 

2237 date_format : {None, 'epoch', 'iso'} 

2238 Type of date conversion. 'epoch' = epoch milliseconds, 

2239 'iso' = ISO8601. The default depends on the `orient`. For 

2240 ``orient='table'``, the default is 'iso'. For all other orients, 

2241 the default is 'epoch'. 

2242 double_precision : int, default 10 

2243 The number of decimal places to use when encoding 

2244 floating point values. 

2245 force_ascii : bool, default True 

2246 Force encoded string to be ASCII. 

2247 date_unit : str, default 'ms' (milliseconds) 

2248 The time unit to encode to, governs timestamp and ISO8601 

2249 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, 

2250 microsecond, and nanosecond respectively. 

2251 default_handler : callable, default None 

2252 Handler to call if object cannot otherwise be converted to a 

2253 suitable format for JSON. Should receive a single argument which is 

2254 the object to convert and return a serialisable object. 

2255 lines : bool, default False 

2256 If 'orient' is 'records' write out line delimited json format. Will 

2257 throw ValueError if incorrect 'orient' since others are not list 

2258 like. 

2259 

2260 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} 

2261 

2262 A string representing the compression to use in the output file, 

2263 only used when the first argument is a filename. By default, the 

2264 compression is inferred from the filename. 

2265 

2266 .. versionadded:: 0.21.0 

2267 .. versionchanged:: 0.24.0 

2268 'infer' option added and set to default 

2269 index : bool, default True 

2270 Whether to include the index values in the JSON string. Not 

2271 including the index (``index=False``) is only supported when 

2272 orient is 'split' or 'table'. 

2273 

2274 .. versionadded:: 0.23.0 

2275 

2276 indent : int, optional 

2277 Length of whitespace used to indent each record. 

2278 

2279 .. versionadded:: 1.0.0 

2280 

2281 Returns 

2282 ------- 

2283 None or str 

2284 If path_or_buf is None, returns the resulting json format as a 

2285 string. Otherwise returns None. 

2286 

2287 See Also 

2288 -------- 

2289 read_json 

2290 

2291 Notes 

2292 ----- 

2293 The behavior of ``indent=0`` varies from the stdlib, which does not 

2294 indent the output but does insert newlines. Currently, ``indent=0`` 

2295 and the default ``indent=None`` are equivalent in pandas, though this 

2296 may change in a future release. 

2297 

2298 Examples 

2299 -------- 

2300 

2301 >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], 

2302 ... index=['row 1', 'row 2'], 

2303 ... columns=['col 1', 'col 2']) 

2304 >>> df.to_json(orient='split') 

2305 '{"columns":["col 1","col 2"], 

2306 "index":["row 1","row 2"], 

2307 "data":[["a","b"],["c","d"]]}' 

2308 

2309 Encoding/decoding a Dataframe using ``'records'`` formatted JSON. 

2310 Note that index labels are not preserved with this encoding. 

2311 

2312 >>> df.to_json(orient='records') 

2313 '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' 

2314 

2315 Encoding/decoding a Dataframe using ``'index'`` formatted JSON: 

2316 

2317 >>> df.to_json(orient='index') 

2318 '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' 

2319 

2320 Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: 

2321 

2322 >>> df.to_json(orient='columns') 

2323 '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' 

2324 

2325 Encoding/decoding a Dataframe using ``'values'`` formatted JSON: 

2326 

2327 >>> df.to_json(orient='values') 

2328 '[["a","b"],["c","d"]]' 

2329 

2330 Encoding with Table Schema 

2331 

2332 >>> df.to_json(orient='table') 

2333 '{"schema": {"fields": [{"name": "index", "type": "string"}, 

2334 {"name": "col 1", "type": "string"}, 

2335 {"name": "col 2", "type": "string"}], 

2336 "primaryKey": "index", 

2337 "pandas_version": "0.20.0"}, 

2338 "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, 

2339 {"index": "row 2", "col 1": "c", "col 2": "d"}]}' 

2340 """ 

2341 

2342 from pandas.io import json 

2343 

2344 if date_format is None and orient == "table": 

2345 date_format = "iso" 

2346 elif date_format is None: 

2347 date_format = "epoch" 

2348 

2349 config.is_nonnegative_int(indent) 

2350 indent = indent or 0 

2351 

2352 return json.to_json( 

2353 path_or_buf=path_or_buf, 

2354 obj=self, 

2355 orient=orient, 

2356 date_format=date_format, 

2357 double_precision=double_precision, 

2358 force_ascii=force_ascii, 

2359 date_unit=date_unit, 

2360 default_handler=default_handler, 

2361 lines=lines, 

2362 compression=compression, 

2363 index=index, 

2364 indent=indent, 

2365 ) 

2366 

2367 def to_hdf( 

2368 self, 

2369 path_or_buf, 

2370 key: str, 

2371 mode: str = "a", 

2372 complevel: Optional[int] = None, 

2373 complib: Optional[str] = None, 

2374 append: bool_t = False, 

2375 format: Optional[str] = None, 

2376 index: bool_t = True, 

2377 min_itemsize: Optional[Union[int, Dict[str, int]]] = None, 

2378 nan_rep=None, 

2379 dropna: Optional[bool_t] = None, 

2380 data_columns: Optional[List[str]] = None, 

2381 errors: str = "strict", 

2382 encoding: str = "UTF-8", 

2383 ) -> None: 

2384 """ 

2385 Write the contained data to an HDF5 file using HDFStore. 

2386 

2387 Hierarchical Data Format (HDF) is self-describing, allowing an 

2388 application to interpret the structure and contents of a file with 

2389 no outside information. One HDF file can hold a mix of related objects 

2390 which can be accessed as a group or as individual objects. 

2391 

2392 In order to add another DataFrame or Series to an existing HDF file 

2393 please use append mode and a different a key. 

2394 

2395 For more information see the :ref:`user guide <io.hdf5>`. 

2396 

2397 Parameters 

2398 ---------- 

2399 path_or_buf : str or pandas.HDFStore 

2400 File path or HDFStore object. 

2401 key : str 

2402 Identifier for the group in the store. 

2403 mode : {'a', 'w', 'r+'}, default 'a' 

2404 Mode to open file: 

2405 

2406 - 'w': write, a new file is created (an existing file with 

2407 the same name would be deleted). 

2408 - 'a': append, an existing file is opened for reading and 

2409 writing, and if the file does not exist it is created. 

2410 - 'r+': similar to 'a', but the file must already exist. 

2411 complevel : {0-9}, optional 

2412 Specifies a compression level for data. 

2413 A value of 0 disables compression. 

2414 complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' 

2415 Specifies the compression library to be used. 

2416 As of v0.20.2 these additional compressors for Blosc are supported 

2417 (default if no compressor specified: 'blosc:blosclz'): 

2418 {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 

2419 'blosc:zlib', 'blosc:zstd'}. 

2420 Specifying a compression library which is not available issues 

2421 a ValueError. 

2422 append : bool, default False 

2423 For Table formats, append the input data to the existing. 

2424 format : {'fixed', 'table', None}, default 'fixed' 

2425 Possible values: 

2426 

2427 - 'fixed': Fixed format. Fast writing/reading. Not-appendable, 

2428 nor searchable. 

2429 - 'table': Table format. Write as a PyTables Table structure 

2430 which may perform worse but allow more flexible operations 

2431 like searching / selecting subsets of the data. 

2432 - If None, pd.get_option('io.hdf.default_format') is checked, 

2433 followed by fallback to "fixed" 

2434 errors : str, default 'strict' 

2435 Specifies how encoding and decoding errors are to be handled. 

2436 See the errors argument for :func:`open` for a full list 

2437 of options. 

2438 encoding : str, default "UTF-8" 

2439 min_itemsize : dict or int, optional 

2440 Map column names to minimum string sizes for columns. 

2441 nan_rep : Any, optional 

2442 How to represent null values as str. 

2443 Not allowed with append=True. 

2444 data_columns : list of columns or True, optional 

2445 List of columns to create as indexed data columns for on-disk 

2446 queries, or True to use all columns. By default only the axes 

2447 of the object are indexed. See :ref:`io.hdf5-query-data-columns`. 

2448 Applicable only to format='table'. 

2449 

2450 See Also 

2451 -------- 

2452 DataFrame.read_hdf : Read from HDF file. 

2453 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2454 DataFrame.to_sql : Write to a sql table. 

2455 DataFrame.to_feather : Write out feather-format for DataFrames. 

2456 DataFrame.to_csv : Write out to a csv file. 

2457 

2458 Examples 

2459 -------- 

2460 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, 

2461 ... index=['a', 'b', 'c']) 

2462 >>> df.to_hdf('data.h5', key='df', mode='w') 

2463 

2464 We can add another object to the same file: 

2465 

2466 >>> s = pd.Series([1, 2, 3, 4]) 

2467 >>> s.to_hdf('data.h5', key='s') 

2468 

2469 Reading from HDF file: 

2470 

2471 >>> pd.read_hdf('data.h5', 'df') 

2472 A B 

2473 a 1 4 

2474 b 2 5 

2475 c 3 6 

2476 >>> pd.read_hdf('data.h5', 's') 

2477 0 1 

2478 1 2 

2479 2 3 

2480 3 4 

2481 dtype: int64 

2482 

2483 Deleting file with data: 

2484 

2485 >>> import os 

2486 >>> os.remove('data.h5') 

2487 """ 

2488 from pandas.io import pytables 

2489 

2490 pytables.to_hdf( 

2491 path_or_buf, 

2492 key, 

2493 self, 

2494 mode=mode, 

2495 complevel=complevel, 

2496 complib=complib, 

2497 append=append, 

2498 format=format, 

2499 index=index, 

2500 min_itemsize=min_itemsize, 

2501 nan_rep=nan_rep, 

2502 dropna=dropna, 

2503 data_columns=data_columns, 

2504 errors=errors, 

2505 encoding=encoding, 

2506 ) 

2507 

2508 def to_sql( 

2509 self, 

2510 name: str, 

2511 con, 

2512 schema=None, 

2513 if_exists: str = "fail", 

2514 index: bool_t = True, 

2515 index_label=None, 

2516 chunksize=None, 

2517 dtype=None, 

2518 method=None, 

2519 ) -> None: 

2520 """ 

2521 Write records stored in a DataFrame to a SQL database. 

2522 

2523 Databases supported by SQLAlchemy [1]_ are supported. Tables can be 

2524 newly created, appended to, or overwritten. 

2525 

2526 Parameters 

2527 ---------- 

2528 name : str 

2529 Name of SQL table. 

2530 con : sqlalchemy.engine.Engine or sqlite3.Connection 

2531 Using SQLAlchemy makes it possible to use any DB supported by that 

2532 library. Legacy support is provided for sqlite3.Connection objects. The user 

2533 is responsible for engine disposal and connection closure for the SQLAlchemy 

2534 connectable See `here \ 

2535 <https://docs.sqlalchemy.org/en/13/core/connections.html>`_ 

2536 

2537 schema : str, optional 

2538 Specify the schema (if database flavor supports this). If None, use 

2539 default schema. 

2540 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

2541 How to behave if the table already exists. 

2542 

2543 * fail: Raise a ValueError. 

2544 * replace: Drop the table before inserting new values. 

2545 * append: Insert new values to the existing table. 

2546 

2547 index : bool, default True 

2548 Write DataFrame index as a column. Uses `index_label` as the column 

2549 name in the table. 

2550 index_label : str or sequence, default None 

2551 Column label for index column(s). If None is given (default) and 

2552 `index` is True, then the index names are used. 

2553 A sequence should be given if the DataFrame uses MultiIndex. 

2554 chunksize : int, optional 

2555 Specify the number of rows in each batch to be written at a time. 

2556 By default, all rows will be written at once. 

2557 dtype : dict or scalar, optional 

2558 Specifying the datatype for columns. If a dictionary is used, the 

2559 keys should be the column names and the values should be the 

2560 SQLAlchemy types or strings for the sqlite3 legacy mode. If a 

2561 scalar is provided, it will be applied to all columns. 

2562 method : {None, 'multi', callable}, optional 

2563 Controls the SQL insertion clause used: 

2564 

2565 * None : Uses standard SQL ``INSERT`` clause (one per row). 

2566 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

2567 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

2568 

2569 Details and a sample callable implementation can be found in the 

2570 section :ref:`insert method <io.sql.method>`. 

2571 

2572 .. versionadded:: 0.24.0 

2573 

2574 Raises 

2575 ------ 

2576 ValueError 

2577 When the table already exists and `if_exists` is 'fail' (the 

2578 default). 

2579 

2580 See Also 

2581 -------- 

2582 read_sql : Read a DataFrame from a table. 

2583 

2584 Notes 

2585 ----- 

2586 Timezone aware datetime columns will be written as 

2587 ``Timestamp with timezone`` type with SQLAlchemy if supported by the 

2588 database. Otherwise, the datetimes will be stored as timezone unaware 

2589 timestamps local to the original timezone. 

2590 

2591 .. versionadded:: 0.24.0 

2592 

2593 References 

2594 ---------- 

2595 .. [1] http://docs.sqlalchemy.org 

2596 .. [2] https://www.python.org/dev/peps/pep-0249/ 

2597 

2598 Examples 

2599 -------- 

2600 

2601 Create an in-memory SQLite database. 

2602 

2603 >>> from sqlalchemy import create_engine 

2604 >>> engine = create_engine('sqlite://', echo=False) 

2605 

2606 Create a table from scratch with 3 rows. 

2607 

2608 >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) 

2609 >>> df 

2610 name 

2611 0 User 1 

2612 1 User 2 

2613 2 User 3 

2614 

2615 >>> df.to_sql('users', con=engine) 

2616 >>> engine.execute("SELECT * FROM users").fetchall() 

2617 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] 

2618 

2619 >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) 

2620 >>> df1.to_sql('users', con=engine, if_exists='append') 

2621 >>> engine.execute("SELECT * FROM users").fetchall() 

2622 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), 

2623 (0, 'User 4'), (1, 'User 5')] 

2624 

2625 Overwrite the table with just ``df1``. 

2626 

2627 >>> df1.to_sql('users', con=engine, if_exists='replace', 

2628 ... index_label='id') 

2629 >>> engine.execute("SELECT * FROM users").fetchall() 

2630 [(0, 'User 4'), (1, 'User 5')] 

2631 

2632 Specify the dtype (especially useful for integers with missing values). 

2633 Notice that while pandas is forced to store the data as floating point, 

2634 the database supports nullable integers. When fetching the data with 

2635 Python, we get back integer scalars. 

2636 

2637 >>> df = pd.DataFrame({"A": [1, None, 2]}) 

2638 >>> df 

2639 A 

2640 0 1.0 

2641 1 NaN 

2642 2 2.0 

2643 

2644 >>> from sqlalchemy.types import Integer 

2645 >>> df.to_sql('integers', con=engine, index=False, 

2646 ... dtype={"A": Integer()}) 

2647 

2648 >>> engine.execute("SELECT * FROM integers").fetchall() 

2649 [(1,), (None,), (2,)] 

2650 """ 

2651 from pandas.io import sql 

2652 

2653 sql.to_sql( 

2654 self, 

2655 name, 

2656 con, 

2657 schema=schema, 

2658 if_exists=if_exists, 

2659 index=index, 

2660 index_label=index_label, 

2661 chunksize=chunksize, 

2662 dtype=dtype, 

2663 method=method, 

2664 ) 

2665 

2666 def to_pickle( 

2667 self, 

2668 path, 

2669 compression: Optional[str] = "infer", 

2670 protocol: int = pickle.HIGHEST_PROTOCOL, 

2671 ) -> None: 

2672 """ 

2673 Pickle (serialize) object to file. 

2674 

2675 Parameters 

2676 ---------- 

2677 path : str 

2678 File path where the pickled object will be stored. 

2679 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, \ 

2680 default 'infer' 

2681 A string representing the compression to use in the output file. By 

2682 default, infers from the file extension in specified path. 

2683 protocol : int 

2684 Int which indicates which protocol should be used by the pickler, 

2685 default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible 

2686 values are 0, 1, 2, 3, 4. A negative value for the protocol 

2687 parameter is equivalent to setting its value to HIGHEST_PROTOCOL. 

2688 

2689 .. [1] https://docs.python.org/3/library/pickle.html. 

2690 .. versionadded:: 0.21.0. 

2691 

2692 See Also 

2693 -------- 

2694 read_pickle : Load pickled pandas object (or any object) from file. 

2695 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

2696 DataFrame.to_sql : Write DataFrame to a SQL database. 

2697 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2698 

2699 Examples 

2700 -------- 

2701 >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) 

2702 >>> original_df 

2703 foo bar 

2704 0 0 5 

2705 1 1 6 

2706 2 2 7 

2707 3 3 8 

2708 4 4 9 

2709 >>> original_df.to_pickle("./dummy.pkl") 

2710 

2711 >>> unpickled_df = pd.read_pickle("./dummy.pkl") 

2712 >>> unpickled_df 

2713 foo bar 

2714 0 0 5 

2715 1 1 6 

2716 2 2 7 

2717 3 3 8 

2718 4 4 9 

2719 

2720 >>> import os 

2721 >>> os.remove("./dummy.pkl") 

2722 """ 

2723 from pandas.io.pickle import to_pickle 

2724 

2725 to_pickle(self, path, compression=compression, protocol=protocol) 

2726 

2727 def to_clipboard( 

2728 self, excel: bool_t = True, sep: Optional[str] = None, **kwargs 

2729 ) -> None: 

2730 r""" 

2731 Copy object to the system clipboard. 

2732 

2733 Write a text representation of object to the system clipboard. 

2734 This can be pasted into Excel, for example. 

2735 

2736 Parameters 

2737 ---------- 

2738 excel : bool, default True 

2739 Produce output in a csv format for easy pasting into excel. 

2740 

2741 - True, use the provided separator for csv pasting. 

2742 - False, write a string representation of the object to the clipboard. 

2743 

2744 sep : str, default ``'\t'`` 

2745 Field delimiter. 

2746 **kwargs 

2747 These parameters will be passed to DataFrame.to_csv. 

2748 

2749 See Also 

2750 -------- 

2751 DataFrame.to_csv : Write a DataFrame to a comma-separated values 

2752 (csv) file. 

2753 read_clipboard : Read text from clipboard and pass to read_table. 

2754 

2755 Notes 

2756 ----- 

2757 Requirements for your platform. 

2758 

2759 - Linux : `xclip`, or `xsel` (with `PyQt4` modules) 

2760 - Windows : none 

2761 - OS X : none 

2762 

2763 Examples 

2764 -------- 

2765 Copy the contents of a DataFrame to the clipboard. 

2766 

2767 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) 

2768 >>> df.to_clipboard(sep=',') 

2769 ... # Wrote the following to the system clipboard: 

2770 ... # ,A,B,C 

2771 ... # 0,1,2,3 

2772 ... # 1,4,5,6 

2773 

2774 We can omit the the index by passing the keyword `index` and setting 

2775 it to false. 

2776 

2777 >>> df.to_clipboard(sep=',', index=False) 

2778 ... # Wrote the following to the system clipboard: 

2779 ... # A,B,C 

2780 ... # 1,2,3 

2781 ... # 4,5,6 

2782 """ 

2783 from pandas.io import clipboards 

2784 

2785 clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) 

2786 

2787 def to_xarray(self): 

2788 """ 

2789 Return an xarray object from the pandas object. 

2790 

2791 Returns 

2792 ------- 

2793 xarray.DataArray or xarray.Dataset 

2794 Data in the pandas structure converted to Dataset if the object is 

2795 a DataFrame, or a DataArray if the object is a Series. 

2796 

2797 See Also 

2798 -------- 

2799 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

2800 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2801 

2802 Notes 

2803 ----- 

2804 See the `xarray docs <http://xarray.pydata.org/en/stable/>`__ 

2805 

2806 Examples 

2807 -------- 

2808 >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), 

2809 ... ('parrot', 'bird', 24.0, 2), 

2810 ... ('lion', 'mammal', 80.5, 4), 

2811 ... ('monkey', 'mammal', np.nan, 4)], 

2812 ... columns=['name', 'class', 'max_speed', 

2813 ... 'num_legs']) 

2814 >>> df 

2815 name class max_speed num_legs 

2816 0 falcon bird 389.0 2 

2817 1 parrot bird 24.0 2 

2818 2 lion mammal 80.5 4 

2819 3 monkey mammal NaN 4 

2820 

2821 >>> df.to_xarray() 

2822 <xarray.Dataset> 

2823 Dimensions: (index: 4) 

2824 Coordinates: 

2825 * index (index) int64 0 1 2 3 

2826 Data variables: 

2827 name (index) object 'falcon' 'parrot' 'lion' 'monkey' 

2828 class (index) object 'bird' 'bird' 'mammal' 'mammal' 

2829 max_speed (index) float64 389.0 24.0 80.5 nan 

2830 num_legs (index) int64 2 2 4 4 

2831 

2832 >>> df['max_speed'].to_xarray() 

2833 <xarray.DataArray 'max_speed' (index: 4)> 

2834 array([389. , 24. , 80.5, nan]) 

2835 Coordinates: 

2836 * index (index) int64 0 1 2 3 

2837 

2838 >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', 

2839 ... '2018-01-02', '2018-01-02']) 

2840 >>> df_multiindex = pd.DataFrame({'date': dates, 

2841 ... 'animal': ['falcon', 'parrot', 

2842 ... 'falcon', 'parrot'], 

2843 ... 'speed': [350, 18, 361, 15]}) 

2844 >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) 

2845 

2846 >>> df_multiindex 

2847 speed 

2848 date animal 

2849 2018-01-01 falcon 350 

2850 parrot 18 

2851 2018-01-02 falcon 361 

2852 parrot 15 

2853 

2854 >>> df_multiindex.to_xarray() 

2855 <xarray.Dataset> 

2856 Dimensions: (animal: 2, date: 2) 

2857 Coordinates: 

2858 * date (date) datetime64[ns] 2018-01-01 2018-01-02 

2859 * animal (animal) object 'falcon' 'parrot' 

2860 Data variables: 

2861 speed (date, animal) int64 350 18 361 15 

2862 """ 

2863 xarray = import_optional_dependency("xarray") 

2864 

2865 if self.ndim == 1: 

2866 return xarray.DataArray.from_series(self) 

2867 else: 

2868 return xarray.Dataset.from_dataframe(self) 

2869 

2870 @Substitution(returns=fmt.return_docstring) 

2871 def to_latex( 

2872 self, 

2873 buf=None, 

2874 columns=None, 

2875 col_space=None, 

2876 header=True, 

2877 index=True, 

2878 na_rep="NaN", 

2879 formatters=None, 

2880 float_format=None, 

2881 sparsify=None, 

2882 index_names=True, 

2883 bold_rows=False, 

2884 column_format=None, 

2885 longtable=None, 

2886 escape=None, 

2887 encoding=None, 

2888 decimal=".", 

2889 multicolumn=None, 

2890 multicolumn_format=None, 

2891 multirow=None, 

2892 caption=None, 

2893 label=None, 

2894 ): 

2895 r""" 

2896 Render object to a LaTeX tabular, longtable, or nested table/tabular. 

2897 

2898 Requires ``\usepackage{booktabs}``. The output can be copy/pasted 

2899 into a main LaTeX document or read from an external file 

2900 with ``\input{table.tex}``. 

2901 

2902 .. versionchanged:: 0.20.2 

2903 Added to Series. 

2904 

2905 .. versionchanged:: 1.0.0 

2906 Added caption and label arguments. 

2907 

2908 Parameters 

2909 ---------- 

2910 buf : str, Path or StringIO-like, optional, default None 

2911 Buffer to write to. If None, the output is returned as a string. 

2912 columns : list of label, optional 

2913 The subset of columns to write. Writes all columns by default. 

2914 col_space : int, optional 

2915 The minimum width of each column. 

2916 header : bool or list of str, default True 

2917 Write out the column names. If a list of strings is given, 

2918 it is assumed to be aliases for the column names. 

2919 index : bool, default True 

2920 Write row names (index). 

2921 na_rep : str, default 'NaN' 

2922 Missing data representation. 

2923 formatters : list of functions or dict of {str: function}, optional 

2924 Formatter functions to apply to columns' elements by position or 

2925 name. The result of each function must be a unicode string. 

2926 List must be of length equal to the number of columns. 

2927 float_format : one-parameter function or str, optional, default None 

2928 Formatter for floating point numbers. For example 

2929 ``float_format="%%.2f"`` and ``float_format="{:0.2f}".format`` will 

2930 both result in 0.1234 being formatted as 0.12. 

2931 sparsify : bool, optional 

2932 Set to False for a DataFrame with a hierarchical index to print 

2933 every multiindex key at each row. By default, the value will be 

2934 read from the config module. 

2935 index_names : bool, default True 

2936 Prints the names of the indexes. 

2937 bold_rows : bool, default False 

2938 Make the row labels bold in the output. 

2939 column_format : str, optional 

2940 The columns format as specified in `LaTeX table format 

2941 <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3 

2942 columns. By default, 'l' will be used for all columns except 

2943 columns of numbers, which default to 'r'. 

2944 longtable : bool, optional 

2945 By default, the value will be read from the pandas config 

2946 module. Use a longtable environment instead of tabular. Requires 

2947 adding a \usepackage{longtable} to your LaTeX preamble. 

2948 escape : bool, optional 

2949 By default, the value will be read from the pandas config 

2950 module. When set to False prevents from escaping latex special 

2951 characters in column names. 

2952 encoding : str, optional 

2953 A string representing the encoding to use in the output file, 

2954 defaults to 'utf-8'. 

2955 decimal : str, default '.' 

2956 Character recognized as decimal separator, e.g. ',' in Europe. 

2957 multicolumn : bool, default True 

2958 Use \multicolumn to enhance MultiIndex columns. 

2959 The default will be read from the config module. 

2960 multicolumn_format : str, default 'l' 

2961 The alignment for multicolumns, similar to `column_format` 

2962 The default will be read from the config module. 

2963 multirow : bool, default False 

2964 Use \multirow to enhance MultiIndex rows. Requires adding a 

2965 \usepackage{multirow} to your LaTeX preamble. Will print 

2966 centered labels (instead of top-aligned) across the contained 

2967 rows, separating groups via clines. The default will be read 

2968 from the pandas config module. 

2969 caption : str, optional 

2970 The LaTeX caption to be placed inside ``\caption{}`` in the output. 

2971 

2972 .. versionadded:: 1.0.0 

2973 

2974 label : str, optional 

2975 The LaTeX label to be placed inside ``\label{}`` in the output. 

2976 This is used with ``\ref{}`` in the main ``.tex`` file. 

2977 

2978 .. versionadded:: 1.0.0 

2979 %(returns)s 

2980 See Also 

2981 -------- 

2982 DataFrame.to_string : Render a DataFrame to a console-friendly 

2983 tabular output. 

2984 DataFrame.to_html : Render a DataFrame as an HTML table. 

2985 

2986 Examples 

2987 -------- 

2988 >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], 

2989 ... 'mask': ['red', 'purple'], 

2990 ... 'weapon': ['sai', 'bo staff']}) 

2991 >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE 

2992 \begin{tabular}{lll} 

2993 \toprule 

2994 name & mask & weapon \\ 

2995 \midrule 

2996 Raphael & red & sai \\ 

2997 Donatello & purple & bo staff \\ 

2998 \bottomrule 

2999 \end{tabular} 

3000 """ 

3001 # Get defaults from the pandas config 

3002 if self.ndim == 1: 

3003 self = self.to_frame() 

3004 if longtable is None: 

3005 longtable = config.get_option("display.latex.longtable") 

3006 if escape is None: 

3007 escape = config.get_option("display.latex.escape") 

3008 if multicolumn is None: 

3009 multicolumn = config.get_option("display.latex.multicolumn") 

3010 if multicolumn_format is None: 

3011 multicolumn_format = config.get_option("display.latex.multicolumn_format") 

3012 if multirow is None: 

3013 multirow = config.get_option("display.latex.multirow") 

3014 

3015 formatter = DataFrameFormatter( 

3016 self, 

3017 columns=columns, 

3018 col_space=col_space, 

3019 na_rep=na_rep, 

3020 header=header, 

3021 index=index, 

3022 formatters=formatters, 

3023 float_format=float_format, 

3024 bold_rows=bold_rows, 

3025 sparsify=sparsify, 

3026 index_names=index_names, 

3027 escape=escape, 

3028 decimal=decimal, 

3029 ) 

3030 return formatter.to_latex( 

3031 buf=buf, 

3032 column_format=column_format, 

3033 longtable=longtable, 

3034 encoding=encoding, 

3035 multicolumn=multicolumn, 

3036 multicolumn_format=multicolumn_format, 

3037 multirow=multirow, 

3038 caption=caption, 

3039 label=label, 

3040 ) 

3041 

3042 def to_csv( 

3043 self, 

3044 path_or_buf: Optional[FilePathOrBuffer] = None, 

3045 sep: str = ",", 

3046 na_rep: str = "", 

3047 float_format: Optional[str] = None, 

3048 columns: Optional[Sequence[Optional[Hashable]]] = None, 

3049 header: Union[bool_t, List[str]] = True, 

3050 index: bool_t = True, 

3051 index_label: Optional[Union[bool_t, str, Sequence[Optional[Hashable]]]] = None, 

3052 mode: str = "w", 

3053 encoding: Optional[str] = None, 

3054 compression: Optional[Union[str, Mapping[str, str]]] = "infer", 

3055 quoting: Optional[int] = None, 

3056 quotechar: str = '"', 

3057 line_terminator: Optional[str] = None, 

3058 chunksize: Optional[int] = None, 

3059 date_format: Optional[str] = None, 

3060 doublequote: bool_t = True, 

3061 escapechar: Optional[str] = None, 

3062 decimal: Optional[str] = ".", 

3063 ) -> Optional[str]: 

3064 r""" 

3065 Write object to a comma-separated values (csv) file. 

3066 

3067 .. versionchanged:: 0.24.0 

3068 The order of arguments for Series was changed. 

3069 

3070 Parameters 

3071 ---------- 

3072 path_or_buf : str or file handle, default None 

3073 File path or object, if None is provided the result is returned as 

3074 a string. If a file object is passed it should be opened with 

3075 `newline=''`, disabling universal newlines. 

3076 

3077 .. versionchanged:: 0.24.0 

3078 

3079 Was previously named "path" for Series. 

3080 

3081 sep : str, default ',' 

3082 String of length 1. Field delimiter for the output file. 

3083 na_rep : str, default '' 

3084 Missing data representation. 

3085 float_format : str, default None 

3086 Format string for floating point numbers. 

3087 columns : sequence, optional 

3088 Columns to write. 

3089 header : bool or list of str, default True 

3090 Write out the column names. If a list of strings is given it is 

3091 assumed to be aliases for the column names. 

3092 

3093 .. versionchanged:: 0.24.0 

3094 

3095 Previously defaulted to False for Series. 

3096 

3097 index : bool, default True 

3098 Write row names (index). 

3099 index_label : str or sequence, or False, default None 

3100 Column label for index column(s) if desired. If None is given, and 

3101 `header` and `index` are True, then the index names are used. A 

3102 sequence should be given if the object uses MultiIndex. If 

3103 False do not print fields for index names. Use index_label=False 

3104 for easier importing in R. 

3105 mode : str 

3106 Python write mode, default 'w'. 

3107 encoding : str, optional 

3108 A string representing the encoding to use in the output file, 

3109 defaults to 'utf-8'. 

3110 compression : str or dict, default 'infer' 

3111 If str, represents compression mode. If dict, value at 'method' is 

3112 the compression mode. Compression mode may be any of the following 

3113 possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If 

3114 compression mode is 'infer' and `path_or_buf` is path-like, then 

3115 detect compression mode from the following extensions: '.gz', 

3116 '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given 

3117 and mode is 'zip' or inferred as 'zip', other entries passed as 

3118 additional compression options. 

3119 

3120 .. versionchanged:: 1.0.0 

3121 

3122 May now be a dict with key 'method' as compression mode 

3123 and other entries as additional compression options if 

3124 compression mode is 'zip'. 

3125 

3126 quoting : optional constant from csv module 

3127 Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` 

3128 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC 

3129 will treat them as non-numeric. 

3130 quotechar : str, default '\"' 

3131 String of length 1. Character used to quote fields. 

3132 line_terminator : str, optional 

3133 The newline character or character sequence to use in the output 

3134 file. Defaults to `os.linesep`, which depends on the OS in which 

3135 this method is called ('\n' for linux, '\r\n' for Windows, i.e.). 

3136 

3137 .. versionchanged:: 0.24.0 

3138 chunksize : int or None 

3139 Rows to write at a time. 

3140 date_format : str, default None 

3141 Format string for datetime objects. 

3142 doublequote : bool, default True 

3143 Control quoting of `quotechar` inside a field. 

3144 escapechar : str, default None 

3145 String of length 1. Character used to escape `sep` and `quotechar` 

3146 when appropriate. 

3147 decimal : str, default '.' 

3148 Character recognized as decimal separator. E.g. use ',' for 

3149 European data. 

3150 

3151 Returns 

3152 ------- 

3153 None or str 

3154 If path_or_buf is None, returns the resulting csv format as a 

3155 string. Otherwise returns None. 

3156 

3157 See Also 

3158 -------- 

3159 read_csv : Load a CSV file into a DataFrame. 

3160 to_excel : Write DataFrame to an Excel file. 

3161 

3162 Examples 

3163 -------- 

3164 >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], 

3165 ... 'mask': ['red', 'purple'], 

3166 ... 'weapon': ['sai', 'bo staff']}) 

3167 >>> df.to_csv(index=False) 

3168 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' 

3169 

3170 Create 'out.zip' containing 'out.csv' 

3171 

3172 >>> compression_opts = dict(method='zip', 

3173 ... archive_name='out.csv') # doctest: +SKIP 

3174 >>> df.to_csv('out.zip', index=False, 

3175 ... compression=compression_opts) # doctest: +SKIP 

3176 """ 

3177 

3178 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

3179 

3180 from pandas.io.formats.csvs import CSVFormatter 

3181 

3182 formatter = CSVFormatter( 

3183 df, 

3184 path_or_buf, 

3185 line_terminator=line_terminator, 

3186 sep=sep, 

3187 encoding=encoding, 

3188 compression=compression, 

3189 quoting=quoting, 

3190 na_rep=na_rep, 

3191 float_format=float_format, 

3192 cols=columns, 

3193 header=header, 

3194 index=index, 

3195 index_label=index_label, 

3196 mode=mode, 

3197 chunksize=chunksize, 

3198 quotechar=quotechar, 

3199 date_format=date_format, 

3200 doublequote=doublequote, 

3201 escapechar=escapechar, 

3202 decimal=decimal, 

3203 ) 

3204 formatter.save() 

3205 

3206 if path_or_buf is None: 

3207 return formatter.path_or_buf.getvalue() 

3208 

3209 return None 

3210 

3211 # ---------------------------------------------------------------------- 

3212 # Fancy Indexing 

3213 

3214 @classmethod 

3215 def _create_indexer(cls, name: str, indexer) -> None: 

3216 """Create an indexer like _name in the class. 

3217 

3218 Kept for compatibility with geopandas. To be removed in the future. See GH27258 

3219 """ 

3220 if getattr(cls, name, None) is None: 

3221 _indexer = functools.partial(indexer, name) 

3222 setattr(cls, name, property(_indexer, doc=indexer.__doc__)) 

3223 

3224 # ---------------------------------------------------------------------- 

3225 # Lookup Caching 

3226 

3227 def _set_as_cached(self, item, cacher) -> None: 

3228 """Set the _cacher attribute on the calling object with a weakref to 

3229 cacher. 

3230 """ 

3231 self._cacher = (item, weakref.ref(cacher)) 

3232 

3233 def _reset_cacher(self) -> None: 

3234 """Reset the cacher.""" 

3235 if hasattr(self, "_cacher"): 

3236 del self._cacher 

3237 

3238 def _maybe_cache_changed(self, item, value) -> None: 

3239 """The object has called back to us saying maybe it has changed. 

3240 """ 

3241 self._data.set(item, value) 

3242 

3243 @property 

3244 def _is_cached(self) -> bool_t: 

3245 """Return boolean indicating if self is cached or not.""" 

3246 return getattr(self, "_cacher", None) is not None 

3247 

3248 def _get_cacher(self): 

3249 """return my cacher or None""" 

3250 cacher = getattr(self, "_cacher", None) 

3251 if cacher is not None: 

3252 cacher = cacher[1]() 

3253 return cacher 

3254 

3255 def _maybe_update_cacher( 

3256 self, clear: bool_t = False, verify_is_copy: bool_t = True 

3257 ) -> None: 

3258 """ 

3259 See if we need to update our parent cacher if clear, then clear our 

3260 cache. 

3261 

3262 Parameters 

3263 ---------- 

3264 clear : bool, default False 

3265 Clear the item cache. 

3266 verify_is_copy : bool, default True 

3267 Provide is_copy checks. 

3268 """ 

3269 

3270 cacher = getattr(self, "_cacher", None) 

3271 if cacher is not None: 

3272 ref = cacher[1]() 

3273 

3274 # we are trying to reference a dead referant, hence 

3275 # a copy 

3276 if ref is None: 

3277 del self._cacher 

3278 else: 

3279 # Note: we need to call ref._maybe_cache_changed even in the 

3280 # case where it will raise. (Uh, not clear why) 

3281 try: 

3282 ref._maybe_cache_changed(cacher[0], self) 

3283 except AssertionError: 

3284 # ref._data.setitem can raise 

3285 # AssertionError because of shape mismatch 

3286 pass 

3287 

3288 if verify_is_copy: 

3289 self._check_setitem_copy(stacklevel=5, t="referant") 

3290 

3291 if clear: 

3292 self._clear_item_cache() 

3293 

3294 def _clear_item_cache(self) -> None: 

3295 self._item_cache.clear() 

3296 

3297 # ---------------------------------------------------------------------- 

3298 # Indexing Methods 

3299 

3300 def take( 

3301 self: FrameOrSeries, indices, axis=0, is_copy: Optional[bool_t] = None, **kwargs 

3302 ) -> FrameOrSeries: 

3303 """ 

3304 Return the elements in the given *positional* indices along an axis. 

3305 

3306 This means that we are not indexing according to actual values in 

3307 the index attribute of the object. We are indexing according to the 

3308 actual position of the element in the object. 

3309 

3310 Parameters 

3311 ---------- 

3312 indices : array-like 

3313 An array of ints indicating which positions to take. 

3314 axis : {0 or 'index', 1 or 'columns', None}, default 0 

3315 The axis on which to select elements. ``0`` means that we are 

3316 selecting rows, ``1`` means that we are selecting columns. 

3317 is_copy : bool 

3318 Before pandas 1.0, ``is_copy=False`` can be specified to ensure 

3319 that the return value is an actual copy. Starting with pandas 1.0, 

3320 ``take`` always returns a copy, and the keyword is therefore 

3321 deprecated. 

3322 

3323 .. deprecated:: 1.0.0 

3324 **kwargs 

3325 For compatibility with :meth:`numpy.take`. Has no effect on the 

3326 output. 

3327 

3328 Returns 

3329 ------- 

3330 taken : same type as caller 

3331 An array-like containing the elements taken from the object. 

3332 

3333 See Also 

3334 -------- 

3335 DataFrame.loc : Select a subset of a DataFrame by labels. 

3336 DataFrame.iloc : Select a subset of a DataFrame by positions. 

3337 numpy.take : Take elements from an array along an axis. 

3338 

3339 Examples 

3340 -------- 

3341 >>> df = pd.DataFrame([('falcon', 'bird', 389.0), 

3342 ... ('parrot', 'bird', 24.0), 

3343 ... ('lion', 'mammal', 80.5), 

3344 ... ('monkey', 'mammal', np.nan)], 

3345 ... columns=['name', 'class', 'max_speed'], 

3346 ... index=[0, 2, 3, 1]) 

3347 >>> df 

3348 name class max_speed 

3349 0 falcon bird 389.0 

3350 2 parrot bird 24.0 

3351 3 lion mammal 80.5 

3352 1 monkey mammal NaN 

3353 

3354 Take elements at positions 0 and 3 along the axis 0 (default). 

3355 

3356 Note how the actual indices selected (0 and 1) do not correspond to 

3357 our selected indices 0 and 3. That's because we are selecting the 0th 

3358 and 3rd rows, not rows whose indices equal 0 and 3. 

3359 

3360 >>> df.take([0, 3]) 

3361 name class max_speed 

3362 0 falcon bird 389.0 

3363 1 monkey mammal NaN 

3364 

3365 Take elements at indices 1 and 2 along the axis 1 (column selection). 

3366 

3367 >>> df.take([1, 2], axis=1) 

3368 class max_speed 

3369 0 bird 389.0 

3370 2 bird 24.0 

3371 3 mammal 80.5 

3372 1 mammal NaN 

3373 

3374 We may take elements using negative integers for positive indices, 

3375 starting from the end of the object, just like with Python lists. 

3376 

3377 >>> df.take([-1, -2]) 

3378 name class max_speed 

3379 1 monkey mammal NaN 

3380 3 lion mammal 80.5 

3381 """ 

3382 if is_copy is not None: 

3383 warnings.warn( 

3384 "is_copy is deprecated and will be removed in a future version. " 

3385 "'take' always returns a copy, so there is no need to specify this.", 

3386 FutureWarning, 

3387 stacklevel=2, 

3388 ) 

3389 

3390 nv.validate_take(tuple(), kwargs) 

3391 

3392 self._consolidate_inplace() 

3393 

3394 new_data = self._data.take( 

3395 indices, axis=self._get_block_manager_axis(axis), verify=True 

3396 ) 

3397 return self._constructor(new_data).__finalize__(self) 

3398 

3399 def _take_with_is_copy( 

3400 self: FrameOrSeries, indices, axis=0, **kwargs 

3401 ) -> FrameOrSeries: 

3402 """ 

3403 Internal version of the `take` method that sets the `_is_copy` 

3404 attribute to keep track of the parent dataframe (using in indexing 

3405 for the SettingWithCopyWarning). 

3406 

3407 See the docstring of `take` for full explanation of the parameters. 

3408 """ 

3409 result = self.take(indices=indices, axis=axis, **kwargs) 

3410 # Maybe set copy if we didn't actually change the index. 

3411 if not result._get_axis(axis).equals(self._get_axis(axis)): 

3412 result._set_is_copy(self) 

3413 return result 

3414 

3415 def xs(self, key, axis=0, level=None, drop_level: bool_t = True): 

3416 """ 

3417 Return cross-section from the Series/DataFrame. 

3418 

3419 This method takes a `key` argument to select data at a particular 

3420 level of a MultiIndex. 

3421 

3422 Parameters 

3423 ---------- 

3424 key : label or tuple of label 

3425 Label contained in the index, or partially in a MultiIndex. 

3426 axis : {0 or 'index', 1 or 'columns'}, default 0 

3427 Axis to retrieve cross-section on. 

3428 level : object, defaults to first n levels (n=1 or len(key)) 

3429 In case of a key partially contained in a MultiIndex, indicate 

3430 which levels are used. Levels can be referred by label or position. 

3431 drop_level : bool, default True 

3432 If False, returns object with same levels as self. 

3433 

3434 Returns 

3435 ------- 

3436 Series or DataFrame 

3437 Cross-section from the original Series or DataFrame 

3438 corresponding to the selected index levels. 

3439 

3440 See Also 

3441 -------- 

3442 DataFrame.loc : Access a group of rows and columns 

3443 by label(s) or a boolean array. 

3444 DataFrame.iloc : Purely integer-location based indexing 

3445 for selection by position. 

3446 

3447 Notes 

3448 ----- 

3449 `xs` can not be used to set values. 

3450 

3451 MultiIndex Slicers is a generic way to get/set values on 

3452 any level or levels. 

3453 It is a superset of `xs` functionality, see 

3454 :ref:`MultiIndex Slicers <advanced.mi_slicers>`. 

3455 

3456 Examples 

3457 -------- 

3458 >>> d = {'num_legs': [4, 4, 2, 2], 

3459 ... 'num_wings': [0, 0, 2, 2], 

3460 ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], 

3461 ... 'animal': ['cat', 'dog', 'bat', 'penguin'], 

3462 ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} 

3463 >>> df = pd.DataFrame(data=d) 

3464 >>> df = df.set_index(['class', 'animal', 'locomotion']) 

3465 >>> df 

3466 num_legs num_wings 

3467 class animal locomotion 

3468 mammal cat walks 4 0 

3469 dog walks 4 0 

3470 bat flies 2 2 

3471 bird penguin walks 2 2 

3472 

3473 Get values at specified index 

3474 

3475 >>> df.xs('mammal') 

3476 num_legs num_wings 

3477 animal locomotion 

3478 cat walks 4 0 

3479 dog walks 4 0 

3480 bat flies 2 2 

3481 

3482 Get values at several indexes 

3483 

3484 >>> df.xs(('mammal', 'dog')) 

3485 num_legs num_wings 

3486 locomotion 

3487 walks 4 0 

3488 

3489 Get values at specified index and level 

3490 

3491 >>> df.xs('cat', level=1) 

3492 num_legs num_wings 

3493 class locomotion 

3494 mammal walks 4 0 

3495 

3496 Get values at several indexes and levels 

3497 

3498 >>> df.xs(('bird', 'walks'), 

3499 ... level=[0, 'locomotion']) 

3500 num_legs num_wings 

3501 animal 

3502 penguin 2 2 

3503 

3504 Get values at specified column and axis 

3505 

3506 >>> df.xs('num_wings', axis=1) 

3507 class animal locomotion 

3508 mammal cat walks 0 

3509 dog walks 0 

3510 bat flies 2 

3511 bird penguin walks 2 

3512 Name: num_wings, dtype: int64 

3513 """ 

3514 axis = self._get_axis_number(axis) 

3515 labels = self._get_axis(axis) 

3516 if level is not None: 

3517 loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) 

3518 

3519 # create the tuple of the indexer 

3520 _indexer = [slice(None)] * self.ndim 

3521 _indexer[axis] = loc 

3522 indexer = tuple(_indexer) 

3523 

3524 result = self.iloc[indexer] 

3525 setattr(result, result._get_axis_name(axis), new_ax) 

3526 return result 

3527 

3528 if axis == 1: 

3529 return self[key] 

3530 

3531 self._consolidate_inplace() 

3532 

3533 index = self.index 

3534 if isinstance(index, MultiIndex): 

3535 loc, new_index = self.index.get_loc_level(key, drop_level=drop_level) 

3536 else: 

3537 loc = self.index.get_loc(key) 

3538 

3539 if isinstance(loc, np.ndarray): 

3540 if loc.dtype == np.bool_: 

3541 (inds,) = loc.nonzero() 

3542 return self._take_with_is_copy(inds, axis=axis) 

3543 else: 

3544 return self._take_with_is_copy(loc, axis=axis) 

3545 

3546 if not is_scalar(loc): 

3547 new_index = self.index[loc] 

3548 

3549 if is_scalar(loc): 

3550 new_values = self._data.fast_xs(loc) 

3551 

3552 # may need to box a datelike-scalar 

3553 # 

3554 # if we encounter an array-like and we only have 1 dim 

3555 # that means that their are list/ndarrays inside the Series! 

3556 # so just return them (GH 6394) 

3557 if not is_list_like(new_values) or self.ndim == 1: 

3558 return com.maybe_box_datetimelike(new_values) 

3559 

3560 result = self._constructor_sliced( 

3561 new_values, 

3562 index=self.columns, 

3563 name=self.index[loc], 

3564 dtype=new_values.dtype, 

3565 ) 

3566 

3567 else: 

3568 result = self.iloc[loc] 

3569 result.index = new_index 

3570 

3571 # this could be a view 

3572 # but only in a single-dtyped view sliceable case 

3573 result._set_is_copy(self, copy=not result._is_view) 

3574 return result 

3575 

3576 _xs: Callable = xs 

3577 

3578 def __getitem__(self, item): 

3579 raise AbstractMethodError(self) 

3580 

3581 def _get_item_cache(self, item): 

3582 """Return the cached item, item represents a label indexer.""" 

3583 cache = self._item_cache 

3584 res = cache.get(item) 

3585 if res is None: 

3586 values = self._data.get(item) 

3587 res = self._box_item_values(item, values) 

3588 cache[item] = res 

3589 res._set_as_cached(item, self) 

3590 

3591 # for a chain 

3592 res._is_copy = self._is_copy 

3593 return res 

3594 

3595 def _iget_item_cache(self, item): 

3596 """Return the cached item, item represents a positional indexer.""" 

3597 ax = self._info_axis 

3598 if ax.is_unique: 

3599 lower = self._get_item_cache(ax[item]) 

3600 else: 

3601 lower = self._take_with_is_copy(item, axis=self._info_axis_number) 

3602 return lower 

3603 

3604 def _box_item_values(self, key, values): 

3605 raise AbstractMethodError(self) 

3606 

3607 def _slice(self: FrameOrSeries, slobj: slice, axis=0, kind=None) -> FrameOrSeries: 

3608 """ 

3609 Construct a slice of this container. 

3610 

3611 kind parameter is maintained for compatibility with Series slicing. 

3612 """ 

3613 axis = self._get_block_manager_axis(axis) 

3614 result = self._constructor(self._data.get_slice(slobj, axis=axis)) 

3615 result = result.__finalize__(self) 

3616 

3617 # this could be a view 

3618 # but only in a single-dtyped view sliceable case 

3619 is_copy = axis != 0 or result._is_view 

3620 result._set_is_copy(self, copy=is_copy) 

3621 return result 

3622 

3623 def _set_item(self, key, value) -> None: 

3624 self._data.set(key, value) 

3625 self._clear_item_cache() 

3626 

3627 def _set_is_copy(self, ref=None, copy: bool_t = True) -> None: 

3628 if not copy: 

3629 self._is_copy = None 

3630 else: 

3631 if ref is not None: 

3632 self._is_copy = weakref.ref(ref) 

3633 else: 

3634 self._is_copy = None 

3635 

3636 def _check_is_chained_assignment_possible(self) -> bool_t: 

3637 """ 

3638 Check if we are a view, have a cacher, and are of mixed type. 

3639 If so, then force a setitem_copy check. 

3640 

3641 Should be called just near setting a value 

3642 

3643 Will return a boolean if it we are a view and are cached, but a 

3644 single-dtype meaning that the cacher should be updated following 

3645 setting. 

3646 """ 

3647 if self._is_view and self._is_cached: 

3648 ref = self._get_cacher() 

3649 if ref is not None and ref._is_mixed_type: 

3650 self._check_setitem_copy(stacklevel=4, t="referant", force=True) 

3651 return True 

3652 elif self._is_copy: 

3653 self._check_setitem_copy(stacklevel=4, t="referant") 

3654 return False 

3655 

3656 def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): 

3657 """ 

3658 

3659 Parameters 

3660 ---------- 

3661 stacklevel : int, default 4 

3662 the level to show of the stack when the error is output 

3663 t : str, the type of setting error 

3664 force : bool, default False 

3665 If True, then force showing an error. 

3666 

3667 validate if we are doing a setitem on a chained copy. 

3668 

3669 If you call this function, be sure to set the stacklevel such that the 

3670 user will see the error *at the level of setting* 

3671 

3672 It is technically possible to figure out that we are setting on 

3673 a copy even WITH a multi-dtyped pandas object. In other words, some 

3674 blocks may be views while other are not. Currently _is_view will ALWAYS 

3675 return False for multi-blocks to avoid having to handle this case. 

3676 

3677 df = DataFrame(np.arange(0,9), columns=['count']) 

3678 df['group'] = 'b' 

3679 

3680 # This technically need not raise SettingWithCopy if both are view 

3681 # (which is not # generally guaranteed but is usually True. However, 

3682 # this is in general not a good practice and we recommend using .loc. 

3683 df.iloc[0:5]['group'] = 'a' 

3684 

3685 """ 

3686 

3687 # return early if the check is not needed 

3688 if not (force or self._is_copy): 

3689 return 

3690 

3691 value = config.get_option("mode.chained_assignment") 

3692 if value is None: 

3693 return 

3694 

3695 # see if the copy is not actually referred; if so, then dissolve 

3696 # the copy weakref 

3697 if self._is_copy is not None and not isinstance(self._is_copy, str): 

3698 r = self._is_copy() 

3699 if not gc.get_referents(r) or r.shape == self.shape: 

3700 self._is_copy = None 

3701 return 

3702 

3703 # a custom message 

3704 if isinstance(self._is_copy, str): 

3705 t = self._is_copy 

3706 

3707 elif t == "referant": 

3708 t = ( 

3709 "\n" 

3710 "A value is trying to be set on a copy of a slice from a " 

3711 "DataFrame\n\n" 

3712 "See the caveats in the documentation: " 

3713 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

3714 "indexing.html#returning-a-view-versus-a-copy" 

3715 ) 

3716 

3717 else: 

3718 t = ( 

3719 "\n" 

3720 "A value is trying to be set on a copy of a slice from a " 

3721 "DataFrame.\n" 

3722 "Try using .loc[row_indexer,col_indexer] = value " 

3723 "instead\n\nSee the caveats in the documentation: " 

3724 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

3725 "indexing.html#returning-a-view-versus-a-copy" 

3726 ) 

3727 

3728 if value == "raise": 

3729 raise com.SettingWithCopyError(t) 

3730 elif value == "warn": 

3731 warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) 

3732 

3733 def __delitem__(self, key) -> None: 

3734 """ 

3735 Delete item 

3736 """ 

3737 deleted = False 

3738 

3739 maybe_shortcut = False 

3740 if self.ndim == 2 and isinstance(self.columns, MultiIndex): 

3741 try: 

3742 maybe_shortcut = key not in self.columns._engine 

3743 except TypeError: 

3744 pass 

3745 

3746 if maybe_shortcut: 

3747 # Allow shorthand to delete all columns whose first len(key) 

3748 # elements match key: 

3749 if not isinstance(key, tuple): 

3750 key = (key,) 

3751 for col in self.columns: 

3752 if isinstance(col, tuple) and col[: len(key)] == key: 

3753 del self[col] 

3754 deleted = True 

3755 if not deleted: 

3756 # If the above loop ran and didn't delete anything because 

3757 # there was no match, this call should raise the appropriate 

3758 # exception: 

3759 self._data.delete(key) 

3760 

3761 # delete from the caches 

3762 try: 

3763 del self._item_cache[key] 

3764 except KeyError: 

3765 pass 

3766 

3767 # ---------------------------------------------------------------------- 

3768 # Unsorted 

3769 

3770 def get(self, key, default=None): 

3771 """ 

3772 Get item from object for given key (ex: DataFrame column). 

3773 

3774 Returns default value if not found. 

3775 

3776 Parameters 

3777 ---------- 

3778 key : object 

3779 

3780 Returns 

3781 ------- 

3782 value : same type as items contained in object 

3783 """ 

3784 try: 

3785 return self[key] 

3786 except (KeyError, ValueError, IndexError): 

3787 return default 

3788 

3789 @property 

3790 def _is_view(self): 

3791 """Return boolean indicating if self is view of another array """ 

3792 return self._data.is_view 

3793 

3794 def reindex_like( 

3795 self: FrameOrSeries, 

3796 other, 

3797 method: Optional[str] = None, 

3798 copy: bool_t = True, 

3799 limit=None, 

3800 tolerance=None, 

3801 ) -> FrameOrSeries: 

3802 """ 

3803 Return an object with matching indices as other object. 

3804 

3805 Conform the object to the same index on all axes. Optional 

3806 filling logic, placing NaN in locations having no value 

3807 in the previous index. A new object is produced unless the 

3808 new index is equivalent to the current one and copy=False. 

3809 

3810 Parameters 

3811 ---------- 

3812 other : Object of the same data type 

3813 Its row and column indices are used to define the new indices 

3814 of this object. 

3815 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} 

3816 Method to use for filling holes in reindexed DataFrame. 

3817 Please note: this is only applicable to DataFrames/Series with a 

3818 monotonically increasing/decreasing index. 

3819 

3820 * None (default): don't fill gaps 

3821 * pad / ffill: propagate last valid observation forward to next 

3822 valid 

3823 * backfill / bfill: use next valid observation to fill gap 

3824 * nearest: use nearest valid observations to fill gap. 

3825 

3826 copy : bool, default True 

3827 Return a new object, even if the passed indexes are the same. 

3828 limit : int, default None 

3829 Maximum number of consecutive labels to fill for inexact matches. 

3830 tolerance : optional 

3831 Maximum distance between original and new labels for inexact 

3832 matches. The values of the index at the matching locations most 

3833 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

3834 

3835 Tolerance may be a scalar value, which applies the same tolerance 

3836 to all values, or list-like, which applies variable tolerance per 

3837 element. List-like includes list, tuple, array, Series, and must be 

3838 the same size as the index and its dtype must exactly match the 

3839 index's type. 

3840 

3841 .. versionadded:: 0.21.0 (list-like tolerance) 

3842 

3843 Returns 

3844 ------- 

3845 Series or DataFrame 

3846 Same type as caller, but with changed indices on each axis. 

3847 

3848 See Also 

3849 -------- 

3850 DataFrame.set_index : Set row labels. 

3851 DataFrame.reset_index : Remove row labels or move them to new columns. 

3852 DataFrame.reindex : Change to new indices or expand indices. 

3853 

3854 Notes 

3855 ----- 

3856 Same as calling 

3857 ``.reindex(index=other.index, columns=other.columns,...)``. 

3858 

3859 Examples 

3860 -------- 

3861 >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], 

3862 ... [31, 87.8, 'high'], 

3863 ... [22, 71.6, 'medium'], 

3864 ... [35, 95, 'medium']], 

3865 ... columns=['temp_celsius', 'temp_fahrenheit', 

3866 ... 'windspeed'], 

3867 ... index=pd.date_range(start='2014-02-12', 

3868 ... end='2014-02-15', freq='D')) 

3869 

3870 >>> df1 

3871 temp_celsius temp_fahrenheit windspeed 

3872 2014-02-12 24.3 75.7 high 

3873 2014-02-13 31.0 87.8 high 

3874 2014-02-14 22.0 71.6 medium 

3875 2014-02-15 35.0 95.0 medium 

3876 

3877 >>> df2 = pd.DataFrame([[28, 'low'], 

3878 ... [30, 'low'], 

3879 ... [35.1, 'medium']], 

3880 ... columns=['temp_celsius', 'windspeed'], 

3881 ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', 

3882 ... '2014-02-15'])) 

3883 

3884 >>> df2 

3885 temp_celsius windspeed 

3886 2014-02-12 28.0 low 

3887 2014-02-13 30.0 low 

3888 2014-02-15 35.1 medium 

3889 

3890 >>> df2.reindex_like(df1) 

3891 temp_celsius temp_fahrenheit windspeed 

3892 2014-02-12 28.0 NaN low 

3893 2014-02-13 30.0 NaN low 

3894 2014-02-14 NaN NaN NaN 

3895 2014-02-15 35.1 NaN medium 

3896 """ 

3897 d = other._construct_axes_dict( 

3898 axes=self._AXIS_ORDERS, 

3899 method=method, 

3900 copy=copy, 

3901 limit=limit, 

3902 tolerance=tolerance, 

3903 ) 

3904 

3905 return self.reindex(**d) 

3906 

3907 def drop( 

3908 self, 

3909 labels=None, 

3910 axis=0, 

3911 index=None, 

3912 columns=None, 

3913 level=None, 

3914 inplace: bool_t = False, 

3915 errors: str = "raise", 

3916 ): 

3917 

3918 inplace = validate_bool_kwarg(inplace, "inplace") 

3919 

3920 if labels is not None: 

3921 if index is not None or columns is not None: 

3922 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") 

3923 axis_name = self._get_axis_name(axis) 

3924 axes = {axis_name: labels} 

3925 elif index is not None or columns is not None: 

3926 axes, _ = self._construct_axes_from_arguments((index, columns), {}) 

3927 else: 

3928 raise ValueError( 

3929 "Need to specify at least one of 'labels', 'index' or 'columns'" 

3930 ) 

3931 

3932 obj = self 

3933 

3934 for axis, labels in axes.items(): 

3935 if labels is not None: 

3936 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 

3937 

3938 if inplace: 

3939 self._update_inplace(obj) 

3940 else: 

3941 return obj 

3942 

3943 def _drop_axis( 

3944 self: FrameOrSeries, labels, axis, level=None, errors: str = "raise" 

3945 ) -> FrameOrSeries: 

3946 """ 

3947 Drop labels from specified axis. Used in the ``drop`` method 

3948 internally. 

3949 

3950 Parameters 

3951 ---------- 

3952 labels : single label or list-like 

3953 axis : int or axis name 

3954 level : int or level name, default None 

3955 For MultiIndex 

3956 errors : {'ignore', 'raise'}, default 'raise' 

3957 If 'ignore', suppress error and existing labels are dropped. 

3958 

3959 """ 

3960 axis = self._get_axis_number(axis) 

3961 axis_name = self._get_axis_name(axis) 

3962 axis = self._get_axis(axis) 

3963 

3964 if axis.is_unique: 

3965 if level is not None: 

3966 if not isinstance(axis, MultiIndex): 

3967 raise AssertionError("axis must be a MultiIndex") 

3968 new_axis = axis.drop(labels, level=level, errors=errors) 

3969 else: 

3970 new_axis = axis.drop(labels, errors=errors) 

3971 result = self.reindex(**{axis_name: new_axis}) 

3972 

3973 # Case for non-unique axis 

3974 else: 

3975 labels = ensure_object(com.index_labels_to_array(labels)) 

3976 if level is not None: 

3977 if not isinstance(axis, MultiIndex): 

3978 raise AssertionError("axis must be a MultiIndex") 

3979 indexer = ~axis.get_level_values(level).isin(labels) 

3980 

3981 # GH 18561 MultiIndex.drop should raise if label is absent 

3982 if errors == "raise" and indexer.all(): 

3983 raise KeyError(f"{labels} not found in axis") 

3984 else: 

3985 indexer = ~axis.isin(labels) 

3986 # Check if label doesn't exist along axis 

3987 labels_missing = (axis.get_indexer_for(labels) == -1).any() 

3988 if errors == "raise" and labels_missing: 

3989 raise KeyError(f"{labels} not found in axis") 

3990 

3991 slicer = [slice(None)] * self.ndim 

3992 slicer[self._get_axis_number(axis_name)] = indexer 

3993 

3994 result = self.loc[tuple(slicer)] 

3995 

3996 return result 

3997 

3998 def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: 

3999 """ 

4000 Replace self internals with result. 

4001 

4002 Parameters 

4003 ---------- 

4004 verify_is_copy : bool, default True 

4005 Provide is_copy checks. 

4006 """ 

4007 # NOTE: This does *not* call __finalize__ and that's an explicit 

4008 # decision that we may revisit in the future. 

4009 

4010 self._reset_cache() 

4011 self._clear_item_cache() 

4012 self._data = getattr(result, "_data", result) 

4013 self._maybe_update_cacher(verify_is_copy=verify_is_copy) 

4014 

4015 def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: 

4016 """ 

4017 Prefix labels with string `prefix`. 

4018 

4019 For Series, the row labels are prefixed. 

4020 For DataFrame, the column labels are prefixed. 

4021 

4022 Parameters 

4023 ---------- 

4024 prefix : str 

4025 The string to add before each label. 

4026 

4027 Returns 

4028 ------- 

4029 Series or DataFrame 

4030 New Series or DataFrame with updated labels. 

4031 

4032 See Also 

4033 -------- 

4034 Series.add_suffix: Suffix row labels with string `suffix`. 

4035 DataFrame.add_suffix: Suffix column labels with string `suffix`. 

4036 

4037 Examples 

4038 -------- 

4039 >>> s = pd.Series([1, 2, 3, 4]) 

4040 >>> s 

4041 0 1 

4042 1 2 

4043 2 3 

4044 3 4 

4045 dtype: int64 

4046 

4047 >>> s.add_prefix('item_') 

4048 item_0 1 

4049 item_1 2 

4050 item_2 3 

4051 item_3 4 

4052 dtype: int64 

4053 

4054 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4055 >>> df 

4056 A B 

4057 0 1 3 

4058 1 2 4 

4059 2 3 5 

4060 3 4 6 

4061 

4062 >>> df.add_prefix('col_') 

4063 col_A col_B 

4064 0 1 3 

4065 1 2 4 

4066 2 3 5 

4067 3 4 6 

4068 """ 

4069 f = functools.partial("{prefix}{}".format, prefix=prefix) 

4070 

4071 mapper = {self._info_axis_name: f} 

4072 return self.rename(**mapper) # type: ignore 

4073 

4074 def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: 

4075 """ 

4076 Suffix labels with string `suffix`. 

4077 

4078 For Series, the row labels are suffixed. 

4079 For DataFrame, the column labels are suffixed. 

4080 

4081 Parameters 

4082 ---------- 

4083 suffix : str 

4084 The string to add after each label. 

4085 

4086 Returns 

4087 ------- 

4088 Series or DataFrame 

4089 New Series or DataFrame with updated labels. 

4090 

4091 See Also 

4092 -------- 

4093 Series.add_prefix: Prefix row labels with string `prefix`. 

4094 DataFrame.add_prefix: Prefix column labels with string `prefix`. 

4095 

4096 Examples 

4097 -------- 

4098 >>> s = pd.Series([1, 2, 3, 4]) 

4099 >>> s 

4100 0 1 

4101 1 2 

4102 2 3 

4103 3 4 

4104 dtype: int64 

4105 

4106 >>> s.add_suffix('_item') 

4107 0_item 1 

4108 1_item 2 

4109 2_item 3 

4110 3_item 4 

4111 dtype: int64 

4112 

4113 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4114 >>> df 

4115 A B 

4116 0 1 3 

4117 1 2 4 

4118 2 3 5 

4119 3 4 6 

4120 

4121 >>> df.add_suffix('_col') 

4122 A_col B_col 

4123 0 1 3 

4124 1 2 4 

4125 2 3 5 

4126 3 4 6 

4127 """ 

4128 f = functools.partial("{}{suffix}".format, suffix=suffix) 

4129 

4130 mapper = {self._info_axis_name: f} 

4131 return self.rename(**mapper) # type: ignore 

4132 

4133 def sort_values( 

4134 self, 

4135 by=None, 

4136 axis=0, 

4137 ascending=True, 

4138 inplace: bool_t = False, 

4139 kind: str = "quicksort", 

4140 na_position: str = "last", 

4141 ignore_index: bool_t = False, 

4142 ): 

4143 """ 

4144 Sort by the values along either axis. 

4145 

4146 Parameters 

4147 ----------%(optional_by)s 

4148 axis : %(axes_single_arg)s, default 0 

4149 Axis to be sorted. 

4150 ascending : bool or list of bool, default True 

4151 Sort ascending vs. descending. Specify list for multiple sort 

4152 orders. If this is a list of bools, must match the length of 

4153 the by. 

4154 inplace : bool, default False 

4155 If True, perform operation in-place. 

4156 kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' 

4157 Choice of sorting algorithm. See also ndarray.np.sort for more 

4158 information. `mergesort` is the only stable algorithm. For 

4159 DataFrames, this option is only applied when sorting on a single 

4160 column or label. 

4161 na_position : {'first', 'last'}, default 'last' 

4162 Puts NaNs at the beginning if `first`; `last` puts NaNs at the 

4163 end. 

4164 ignore_index : bool, default False 

4165 If True, the resulting axis will be labeled 0, 1, …, n - 1. 

4166 

4167 .. versionadded:: 1.0.0 

4168 

4169 Returns 

4170 ------- 

4171 sorted_obj : DataFrame or None 

4172 DataFrame with sorted values if inplace=False, None otherwise. 

4173 

4174 Examples 

4175 -------- 

4176 >>> df = pd.DataFrame({ 

4177 ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], 

4178 ... 'col2': [2, 1, 9, 8, 7, 4], 

4179 ... 'col3': [0, 1, 9, 4, 2, 3], 

4180 ... }) 

4181 >>> df 

4182 col1 col2 col3 

4183 0 A 2 0 

4184 1 A 1 1 

4185 2 B 9 9 

4186 3 NaN 8 4 

4187 4 D 7 2 

4188 5 C 4 3 

4189 

4190 Sort by col1 

4191 

4192 >>> df.sort_values(by=['col1']) 

4193 col1 col2 col3 

4194 0 A 2 0 

4195 1 A 1 1 

4196 2 B 9 9 

4197 5 C 4 3 

4198 4 D 7 2 

4199 3 NaN 8 4 

4200 

4201 Sort by multiple columns 

4202 

4203 >>> df.sort_values(by=['col1', 'col2']) 

4204 col1 col2 col3 

4205 1 A 1 1 

4206 0 A 2 0 

4207 2 B 9 9 

4208 5 C 4 3 

4209 4 D 7 2 

4210 3 NaN 8 4 

4211 

4212 Sort Descending 

4213 

4214 >>> df.sort_values(by='col1', ascending=False) 

4215 col1 col2 col3 

4216 4 D 7 2 

4217 5 C 4 3 

4218 2 B 9 9 

4219 0 A 2 0 

4220 1 A 1 1 

4221 3 NaN 8 4 

4222 

4223 Putting NAs first 

4224 

4225 >>> df.sort_values(by='col1', ascending=False, na_position='first') 

4226 col1 col2 col3 

4227 3 NaN 8 4 

4228 4 D 7 2 

4229 5 C 4 3 

4230 2 B 9 9 

4231 0 A 2 0 

4232 1 A 1 1 

4233 """ 

4234 raise AbstractMethodError(self) 

4235 

4236 def sort_index( 

4237 self, 

4238 axis=0, 

4239 level=None, 

4240 ascending: bool_t = True, 

4241 inplace: bool_t = False, 

4242 kind: str = "quicksort", 

4243 na_position: str = "last", 

4244 sort_remaining: bool_t = True, 

4245 ignore_index: bool_t = False, 

4246 ): 

4247 """ 

4248 Sort object by labels (along an axis). 

4249 

4250 Parameters 

4251 ---------- 

4252 axis : {0 or 'index', 1 or 'columns'}, default 0 

4253 The axis along which to sort. The value 0 identifies the rows, 

4254 and 1 identifies the columns. 

4255 level : int or level name or list of ints or list of level names 

4256 If not None, sort on values in specified index level(s). 

4257 ascending : bool, default True 

4258 Sort ascending vs. descending. 

4259 inplace : bool, default False 

4260 If True, perform operation in-place. 

4261 kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' 

4262 Choice of sorting algorithm. See also ndarray.np.sort for more 

4263 information. `mergesort` is the only stable algorithm. For 

4264 DataFrames, this option is only applied when sorting on a single 

4265 column or label. 

4266 na_position : {'first', 'last'}, default 'last' 

4267 Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. 

4268 Not implemented for MultiIndex. 

4269 sort_remaining : bool, default True 

4270 If True and sorting by level and index is multilevel, sort by other 

4271 levels too (in order) after sorting by specified level. 

4272 ignore_index : bool, default False 

4273 If True, the resulting axis will be labeled 0, 1, …, n - 1. 

4274 

4275 .. versionadded:: 1.0.0 

4276 

4277 Returns 

4278 ------- 

4279 sorted_obj : DataFrame or None 

4280 DataFrame with sorted index if inplace=False, None otherwise. 

4281 """ 

4282 inplace = validate_bool_kwarg(inplace, "inplace") 

4283 axis = self._get_axis_number(axis) 

4284 axis_name = self._get_axis_name(axis) 

4285 labels = self._get_axis(axis) 

4286 

4287 if level is not None: 

4288 raise NotImplementedError("level is not implemented") 

4289 if inplace: 

4290 raise NotImplementedError("inplace is not implemented") 

4291 

4292 sort_index = labels.argsort() 

4293 if not ascending: 

4294 sort_index = sort_index[::-1] 

4295 

4296 new_axis = labels.take(sort_index) 

4297 return self.reindex(**{axis_name: new_axis}) 

4298 

4299 def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: 

4300 """ 

4301 Conform %(klass)s to new index with optional filling logic. 

4302 

4303 Places NA/NaN in locations having no value in the previous index. A new object 

4304 is produced unless the new index is equivalent to the current one and 

4305 ``copy=False``. 

4306 

4307 Parameters 

4308 ---------- 

4309 %(optional_labels)s 

4310 %(axes)s : array-like, optional 

4311 New labels / index to conform to, should be specified using 

4312 keywords. Preferably an Index object to avoid duplicating data. 

4313 %(optional_axis)s 

4314 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} 

4315 Method to use for filling holes in reindexed DataFrame. 

4316 Please note: this is only applicable to DataFrames/Series with a 

4317 monotonically increasing/decreasing index. 

4318 

4319 * None (default): don't fill gaps 

4320 * pad / ffill: Propagate last valid observation forward to next 

4321 valid. 

4322 * backfill / bfill: Use next valid observation to fill gap. 

4323 * nearest: Use nearest valid observations to fill gap. 

4324 

4325 copy : bool, default True 

4326 Return a new object, even if the passed indexes are the same. 

4327 level : int or name 

4328 Broadcast across a level, matching Index values on the 

4329 passed MultiIndex level. 

4330 fill_value : scalar, default np.NaN 

4331 Value to use for missing values. Defaults to NaN, but can be any 

4332 "compatible" value. 

4333 limit : int, default None 

4334 Maximum number of consecutive elements to forward or backward fill. 

4335 tolerance : optional 

4336 Maximum distance between original and new labels for inexact 

4337 matches. The values of the index at the matching locations most 

4338 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4339 

4340 Tolerance may be a scalar value, which applies the same tolerance 

4341 to all values, or list-like, which applies variable tolerance per 

4342 element. List-like includes list, tuple, array, Series, and must be 

4343 the same size as the index and its dtype must exactly match the 

4344 index's type. 

4345 

4346 .. versionadded:: 0.21.0 (list-like tolerance) 

4347 

4348 Returns 

4349 ------- 

4350 %(klass)s with changed index. 

4351 

4352 See Also 

4353 -------- 

4354 DataFrame.set_index : Set row labels. 

4355 DataFrame.reset_index : Remove row labels or move them to new columns. 

4356 DataFrame.reindex_like : Change to same indices as other DataFrame. 

4357 

4358 Examples 

4359 -------- 

4360 

4361 ``DataFrame.reindex`` supports two calling conventions 

4362 

4363 * ``(index=index_labels, columns=column_labels, ...)`` 

4364 * ``(labels, axis={'index', 'columns'}, ...)`` 

4365 

4366 We *highly* recommend using keyword arguments to clarify your 

4367 intent. 

4368 

4369 Create a dataframe with some fictional data. 

4370 

4371 >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] 

4372 >>> df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301], 

4373 ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, 

4374 ... index=index) 

4375 >>> df 

4376 http_status response_time 

4377 Firefox 200 0.04 

4378 Chrome 200 0.02 

4379 Safari 404 0.07 

4380 IE10 404 0.08 

4381 Konqueror 301 1.00 

4382 

4383 Create a new index and reindex the dataframe. By default 

4384 values in the new index that do not have corresponding 

4385 records in the dataframe are assigned ``NaN``. 

4386 

4387 >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 

4388 ... 'Chrome'] 

4389 >>> df.reindex(new_index) 

4390 http_status response_time 

4391 Safari 404.0 0.07 

4392 Iceweasel NaN NaN 

4393 Comodo Dragon NaN NaN 

4394 IE10 404.0 0.08 

4395 Chrome 200.0 0.02 

4396 

4397 We can fill in the missing values by passing a value to 

4398 the keyword ``fill_value``. Because the index is not monotonically 

4399 increasing or decreasing, we cannot use arguments to the keyword 

4400 ``method`` to fill the ``NaN`` values. 

4401 

4402 >>> df.reindex(new_index, fill_value=0) 

4403 http_status response_time 

4404 Safari 404 0.07 

4405 Iceweasel 0 0.00 

4406 Comodo Dragon 0 0.00 

4407 IE10 404 0.08 

4408 Chrome 200 0.02 

4409 

4410 >>> df.reindex(new_index, fill_value='missing') 

4411 http_status response_time 

4412 Safari 404 0.07 

4413 Iceweasel missing missing 

4414 Comodo Dragon missing missing 

4415 IE10 404 0.08 

4416 Chrome 200 0.02 

4417 

4418 We can also reindex the columns. 

4419 

4420 >>> df.reindex(columns=['http_status', 'user_agent']) 

4421 http_status user_agent 

4422 Firefox 200 NaN 

4423 Chrome 200 NaN 

4424 Safari 404 NaN 

4425 IE10 404 NaN 

4426 Konqueror 301 NaN 

4427 

4428 Or we can use "axis-style" keyword arguments 

4429 

4430 >>> df.reindex(['http_status', 'user_agent'], axis="columns") 

4431 http_status user_agent 

4432 Firefox 200 NaN 

4433 Chrome 200 NaN 

4434 Safari 404 NaN 

4435 IE10 404 NaN 

4436 Konqueror 301 NaN 

4437 

4438 To further illustrate the filling functionality in 

4439 ``reindex``, we will create a dataframe with a 

4440 monotonically increasing index (for example, a sequence 

4441 of dates). 

4442 

4443 >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') 

4444 >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, 

4445 ... index=date_index) 

4446 >>> df2 

4447 prices 

4448 2010-01-01 100.0 

4449 2010-01-02 101.0 

4450 2010-01-03 NaN 

4451 2010-01-04 100.0 

4452 2010-01-05 89.0 

4453 2010-01-06 88.0 

4454 

4455 Suppose we decide to expand the dataframe to cover a wider 

4456 date range. 

4457 

4458 >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') 

4459 >>> df2.reindex(date_index2) 

4460 prices 

4461 2009-12-29 NaN 

4462 2009-12-30 NaN 

4463 2009-12-31 NaN 

4464 2010-01-01 100.0 

4465 2010-01-02 101.0 

4466 2010-01-03 NaN 

4467 2010-01-04 100.0 

4468 2010-01-05 89.0 

4469 2010-01-06 88.0 

4470 2010-01-07 NaN 

4471 

4472 The index entries that did not have a value in the original data frame 

4473 (for example, '2009-12-29') are by default filled with ``NaN``. 

4474 If desired, we can fill in the missing values using one of several 

4475 options. 

4476 

4477 For example, to back-propagate the last valid value to fill the ``NaN`` 

4478 values, pass ``bfill`` as an argument to the ``method`` keyword. 

4479 

4480 >>> df2.reindex(date_index2, method='bfill') 

4481 prices 

4482 2009-12-29 100.0 

4483 2009-12-30 100.0 

4484 2009-12-31 100.0 

4485 2010-01-01 100.0 

4486 2010-01-02 101.0 

4487 2010-01-03 NaN 

4488 2010-01-04 100.0 

4489 2010-01-05 89.0 

4490 2010-01-06 88.0 

4491 2010-01-07 NaN 

4492 

4493 Please note that the ``NaN`` value present in the original dataframe 

4494 (at index value 2010-01-03) will not be filled by any of the 

4495 value propagation schemes. This is because filling while reindexing 

4496 does not look at dataframe values, but only compares the original and 

4497 desired indexes. If you do want to fill in the ``NaN`` values present 

4498 in the original dataframe, use the ``fillna()`` method. 

4499 

4500 See the :ref:`user guide <basics.reindexing>` for more. 

4501 """ 

4502 # TODO: Decide if we care about having different examples for different 

4503 # kinds 

4504 

4505 # construct the args 

4506 axes, kwargs = self._construct_axes_from_arguments(args, kwargs) 

4507 method = missing.clean_reindex_fill_method(kwargs.pop("method", None)) 

4508 level = kwargs.pop("level", None) 

4509 copy = kwargs.pop("copy", True) 

4510 limit = kwargs.pop("limit", None) 

4511 tolerance = kwargs.pop("tolerance", None) 

4512 fill_value = kwargs.pop("fill_value", None) 

4513 

4514 # Series.reindex doesn't use / need the axis kwarg 

4515 # We pop and ignore it here, to make writing Series/Frame generic code 

4516 # easier 

4517 kwargs.pop("axis", None) 

4518 

4519 if kwargs: 

4520 raise TypeError( 

4521 "reindex() got an unexpected keyword " 

4522 f'argument "{list(kwargs.keys())[0]}"' 

4523 ) 

4524 

4525 self._consolidate_inplace() 

4526 

4527 # if all axes that are requested to reindex are equal, then only copy 

4528 # if indicated must have index names equal here as well as values 

4529 if all( 

4530 self._get_axis(axis).identical(ax) 

4531 for axis, ax in axes.items() 

4532 if ax is not None 

4533 ): 

4534 if copy: 

4535 return self.copy() 

4536 return self 

4537 

4538 # check if we are a multi reindex 

4539 if self._needs_reindex_multi(axes, method, level): 

4540 return self._reindex_multi(axes, copy, fill_value) 

4541 

4542 # perform the reindex on the axes 

4543 return self._reindex_axes( 

4544 axes, level, limit, tolerance, method, fill_value, copy 

4545 ).__finalize__(self) 

4546 

4547 def _reindex_axes( 

4548 self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy 

4549 ) -> FrameOrSeries: 

4550 """Perform the reindex for all the axes.""" 

4551 obj = self 

4552 for a in self._AXIS_ORDERS: 

4553 labels = axes[a] 

4554 if labels is None: 

4555 continue 

4556 

4557 ax = self._get_axis(a) 

4558 new_index, indexer = ax.reindex( 

4559 labels, level=level, limit=limit, tolerance=tolerance, method=method 

4560 ) 

4561 

4562 axis = self._get_axis_number(a) 

4563 obj = obj._reindex_with_indexers( 

4564 {axis: [new_index, indexer]}, 

4565 fill_value=fill_value, 

4566 copy=copy, 

4567 allow_dups=False, 

4568 ) 

4569 

4570 return obj 

4571 

4572 def _needs_reindex_multi(self, axes, method, level) -> bool_t: 

4573 """Check if we do need a multi reindex.""" 

4574 return ( 

4575 (com.count_not_none(*axes.values()) == self._AXIS_LEN) 

4576 and method is None 

4577 and level is None 

4578 and not self._is_mixed_type 

4579 ) 

4580 

4581 def _reindex_multi(self, axes, copy, fill_value): 

4582 raise AbstractMethodError(self) 

4583 

4584 def _reindex_with_indexers( 

4585 self: FrameOrSeries, 

4586 reindexers, 

4587 fill_value=None, 

4588 copy: bool_t = False, 

4589 allow_dups: bool_t = False, 

4590 ) -> FrameOrSeries: 

4591 """allow_dups indicates an internal call here """ 

4592 

4593 # reindex doing multiple operations on different axes if indicated 

4594 new_data = self._data 

4595 for axis in sorted(reindexers.keys()): 

4596 index, indexer = reindexers[axis] 

4597 baxis = self._get_block_manager_axis(axis) 

4598 

4599 if index is None: 

4600 continue 

4601 

4602 index = ensure_index(index) 

4603 if indexer is not None: 

4604 indexer = ensure_int64(indexer) 

4605 

4606 # TODO: speed up on homogeneous DataFrame objects 

4607 new_data = new_data.reindex_indexer( 

4608 index, 

4609 indexer, 

4610 axis=baxis, 

4611 fill_value=fill_value, 

4612 allow_dups=allow_dups, 

4613 copy=copy, 

4614 ) 

4615 

4616 if copy and new_data is self._data: 

4617 new_data = new_data.copy() 

4618 

4619 return self._constructor(new_data).__finalize__(self) 

4620 

4621 def filter( 

4622 self: FrameOrSeries, 

4623 items=None, 

4624 like: Optional[str] = None, 

4625 regex: Optional[str] = None, 

4626 axis=None, 

4627 ) -> FrameOrSeries: 

4628 """ 

4629 Subset the dataframe rows or columns according to the specified index labels. 

4630 

4631 Note that this routine does not filter a dataframe on its 

4632 contents. The filter is applied to the labels of the index. 

4633 

4634 Parameters 

4635 ---------- 

4636 items : list-like 

4637 Keep labels from axis which are in items. 

4638 like : str 

4639 Keep labels from axis for which "like in label == True". 

4640 regex : str (regular expression) 

4641 Keep labels from axis for which re.search(regex, label) == True. 

4642 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

4643 The axis to filter on, expressed either as an index (int) 

4644 or axis name (str). By default this is the info axis, 

4645 'index' for Series, 'columns' for DataFrame. 

4646 

4647 Returns 

4648 ------- 

4649 same type as input object 

4650 

4651 See Also 

4652 -------- 

4653 DataFrame.loc 

4654 

4655 Notes 

4656 ----- 

4657 The ``items``, ``like``, and ``regex`` parameters are 

4658 enforced to be mutually exclusive. 

4659 

4660 ``axis`` defaults to the info axis that is used when indexing 

4661 with ``[]``. 

4662 

4663 Examples 

4664 -------- 

4665 >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), 

4666 ... index=['mouse', 'rabbit'], 

4667 ... columns=['one', 'two', 'three']) 

4668 

4669 >>> # select columns by name 

4670 >>> df.filter(items=['one', 'three']) 

4671 one three 

4672 mouse 1 3 

4673 rabbit 4 6 

4674 

4675 >>> # select columns by regular expression 

4676 >>> df.filter(regex='e$', axis=1) 

4677 one three 

4678 mouse 1 3 

4679 rabbit 4 6 

4680 

4681 >>> # select rows containing 'bbi' 

4682 >>> df.filter(like='bbi', axis=0) 

4683 one two three 

4684 rabbit 4 5 6 

4685 """ 

4686 nkw = com.count_not_none(items, like, regex) 

4687 if nkw > 1: 

4688 raise TypeError( 

4689 "Keyword arguments `items`, `like`, or `regex` " 

4690 "are mutually exclusive" 

4691 ) 

4692 

4693 if axis is None: 

4694 axis = self._info_axis_name 

4695 labels = self._get_axis(axis) 

4696 

4697 if items is not None: 

4698 name = self._get_axis_name(axis) 

4699 return self.reindex(**{name: [r for r in items if r in labels]}) 

4700 elif like: 

4701 

4702 def f(x): 

4703 return like in ensure_str(x) 

4704 

4705 values = labels.map(f) 

4706 return self.loc(axis=axis)[values] 

4707 elif regex: 

4708 

4709 def f(x): 

4710 return matcher.search(ensure_str(x)) is not None 

4711 

4712 matcher = re.compile(regex) 

4713 values = labels.map(f) 

4714 return self.loc(axis=axis)[values] 

4715 else: 

4716 raise TypeError("Must pass either `items`, `like`, or `regex`") 

4717 

4718 def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: 

4719 """ 

4720 Return the first `n` rows. 

4721 

4722 This function returns the first `n` rows for the object based 

4723 on position. It is useful for quickly testing if your object 

4724 has the right type of data in it. 

4725 

4726 For negative values of `n`, this function returns all rows except 

4727 the last `n` rows, equivalent to ``df[:-n]``. 

4728 

4729 Parameters 

4730 ---------- 

4731 n : int, default 5 

4732 Number of rows to select. 

4733 

4734 Returns 

4735 ------- 

4736 same type as caller 

4737 The first `n` rows of the caller object. 

4738 

4739 See Also 

4740 -------- 

4741 DataFrame.tail: Returns the last `n` rows. 

4742 

4743 Examples 

4744 -------- 

4745 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

4746 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

4747 >>> df 

4748 animal 

4749 0 alligator 

4750 1 bee 

4751 2 falcon 

4752 3 lion 

4753 4 monkey 

4754 5 parrot 

4755 6 shark 

4756 7 whale 

4757 8 zebra 

4758 

4759 Viewing the first 5 lines 

4760 

4761 >>> df.head() 

4762 animal 

4763 0 alligator 

4764 1 bee 

4765 2 falcon 

4766 3 lion 

4767 4 monkey 

4768 

4769 Viewing the first `n` lines (three in this case) 

4770 

4771 >>> df.head(3) 

4772 animal 

4773 0 alligator 

4774 1 bee 

4775 2 falcon 

4776 

4777 For negative values of `n` 

4778 

4779 >>> df.head(-3) 

4780 animal 

4781 0 alligator 

4782 1 bee 

4783 2 falcon 

4784 3 lion 

4785 4 monkey 

4786 5 parrot 

4787 """ 

4788 

4789 return self.iloc[:n] 

4790 

4791 def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: 

4792 """ 

4793 Return the last `n` rows. 

4794 

4795 This function returns last `n` rows from the object based on 

4796 position. It is useful for quickly verifying data, for example, 

4797 after sorting or appending rows. 

4798 

4799 For negative values of `n`, this function returns all rows except 

4800 the first `n` rows, equivalent to ``df[n:]``. 

4801 

4802 Parameters 

4803 ---------- 

4804 n : int, default 5 

4805 Number of rows to select. 

4806 

4807 Returns 

4808 ------- 

4809 type of caller 

4810 The last `n` rows of the caller object. 

4811 

4812 See Also 

4813 -------- 

4814 DataFrame.head : The first `n` rows of the caller object. 

4815 

4816 Examples 

4817 -------- 

4818 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

4819 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

4820 >>> df 

4821 animal 

4822 0 alligator 

4823 1 bee 

4824 2 falcon 

4825 3 lion 

4826 4 monkey 

4827 5 parrot 

4828 6 shark 

4829 7 whale 

4830 8 zebra 

4831 

4832 Viewing the last 5 lines 

4833 

4834 >>> df.tail() 

4835 animal 

4836 4 monkey 

4837 5 parrot 

4838 6 shark 

4839 7 whale 

4840 8 zebra 

4841 

4842 Viewing the last `n` lines (three in this case) 

4843 

4844 >>> df.tail(3) 

4845 animal 

4846 6 shark 

4847 7 whale 

4848 8 zebra 

4849 

4850 For negative values of `n` 

4851 

4852 >>> df.tail(-3) 

4853 animal 

4854 3 lion 

4855 4 monkey 

4856 5 parrot 

4857 6 shark 

4858 7 whale 

4859 8 zebra 

4860 """ 

4861 

4862 if n == 0: 

4863 return self.iloc[0:0] 

4864 return self.iloc[-n:] 

4865 

4866 def sample( 

4867 self: FrameOrSeries, 

4868 n=None, 

4869 frac=None, 

4870 replace=False, 

4871 weights=None, 

4872 random_state=None, 

4873 axis=None, 

4874 ) -> FrameOrSeries: 

4875 """ 

4876 Return a random sample of items from an axis of object. 

4877 

4878 You can use `random_state` for reproducibility. 

4879 

4880 Parameters 

4881 ---------- 

4882 n : int, optional 

4883 Number of items from axis to return. Cannot be used with `frac`. 

4884 Default = 1 if `frac` = None. 

4885 frac : float, optional 

4886 Fraction of axis items to return. Cannot be used with `n`. 

4887 replace : bool, default False 

4888 Allow or disallow sampling of the same row more than once. 

4889 weights : str or ndarray-like, optional 

4890 Default 'None' results in equal probability weighting. 

4891 If passed a Series, will align with target object on index. Index 

4892 values in weights not found in sampled object will be ignored and 

4893 index values in sampled object not in weights will be assigned 

4894 weights of zero. 

4895 If called on a DataFrame, will accept the name of a column 

4896 when axis = 0. 

4897 Unless weights are a Series, weights must be same length as axis 

4898 being sampled. 

4899 If weights do not sum to 1, they will be normalized to sum to 1. 

4900 Missing values in the weights column will be treated as zero. 

4901 Infinite values not allowed. 

4902 random_state : int or numpy.random.RandomState, optional 

4903 Seed for the random number generator (if int), or numpy RandomState 

4904 object. 

4905 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

4906 Axis to sample. Accepts axis number or name. Default is stat axis 

4907 for given data type (0 for Series and DataFrames). 

4908 

4909 Returns 

4910 ------- 

4911 Series or DataFrame 

4912 A new object of same type as caller containing `n` items randomly 

4913 sampled from the caller object. 

4914 

4915 See Also 

4916 -------- 

4917 numpy.random.choice: Generates a random sample from a given 1-D numpy 

4918 array. 

4919 

4920 Notes 

4921 ----- 

4922 If `frac` > 1, `replacement` should be set to `True`. 

4923 

4924 Examples 

4925 -------- 

4926 >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], 

4927 ... 'num_wings': [2, 0, 0, 0], 

4928 ... 'num_specimen_seen': [10, 2, 1, 8]}, 

4929 ... index=['falcon', 'dog', 'spider', 'fish']) 

4930 >>> df 

4931 num_legs num_wings num_specimen_seen 

4932 falcon 2 2 10 

4933 dog 4 0 2 

4934 spider 8 0 1 

4935 fish 0 0 8 

4936 

4937 Extract 3 random elements from the ``Series`` ``df['num_legs']``: 

4938 Note that we use `random_state` to ensure the reproducibility of 

4939 the examples. 

4940 

4941 >>> df['num_legs'].sample(n=3, random_state=1) 

4942 fish 0 

4943 spider 8 

4944 falcon 2 

4945 Name: num_legs, dtype: int64 

4946 

4947 A random 50% sample of the ``DataFrame`` with replacement: 

4948 

4949 >>> df.sample(frac=0.5, replace=True, random_state=1) 

4950 num_legs num_wings num_specimen_seen 

4951 dog 4 0 2 

4952 fish 0 0 8 

4953 

4954 An upsample sample of the ``DataFrame`` with replacement: 

4955 Note that `replace` parameter has to be `True` for `frac` parameter > 1. 

4956 

4957 >>> df.sample(frac=2, replace=True, random_state=1) 

4958 num_legs num_wings num_specimen_seen 

4959 dog 4 0 2 

4960 fish 0 0 8 

4961 falcon 2 2 10 

4962 falcon 2 2 10 

4963 fish 0 0 8 

4964 dog 4 0 2 

4965 fish 0 0 8 

4966 dog 4 0 2 

4967 

4968 Using a DataFrame column as weights. Rows with larger value in the 

4969 `num_specimen_seen` column are more likely to be sampled. 

4970 

4971 >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) 

4972 num_legs num_wings num_specimen_seen 

4973 falcon 2 2 10 

4974 fish 0 0 8 

4975 """ 

4976 

4977 if axis is None: 

4978 axis = self._stat_axis_number 

4979 

4980 axis = self._get_axis_number(axis) 

4981 axis_length = self.shape[axis] 

4982 

4983 # Process random_state argument 

4984 rs = com.random_state(random_state) 

4985 

4986 # Check weights for compliance 

4987 if weights is not None: 

4988 

4989 # If a series, align with frame 

4990 if isinstance(weights, ABCSeries): 

4991 weights = weights.reindex(self.axes[axis]) 

4992 

4993 # Strings acceptable if a dataframe and axis = 0 

4994 if isinstance(weights, str): 

4995 if isinstance(self, ABCDataFrame): 

4996 if axis == 0: 

4997 try: 

4998 weights = self[weights] 

4999 except KeyError: 

5000 raise KeyError( 

5001 "String passed to weights not a valid column" 

5002 ) 

5003 else: 

5004 raise ValueError( 

5005 "Strings can only be passed to " 

5006 "weights when sampling from rows on " 

5007 "a DataFrame" 

5008 ) 

5009 else: 

5010 raise ValueError( 

5011 "Strings cannot be passed as weights " 

5012 "when sampling from a Series." 

5013 ) 

5014 

5015 weights = pd.Series(weights, dtype="float64") 

5016 

5017 if len(weights) != axis_length: 

5018 raise ValueError( 

5019 "Weights and axis to be sampled must be of same length" 

5020 ) 

5021 

5022 if (weights == np.inf).any() or (weights == -np.inf).any(): 

5023 raise ValueError("weight vector may not include `inf` values") 

5024 

5025 if (weights < 0).any(): 

5026 raise ValueError("weight vector many not include negative values") 

5027 

5028 # If has nan, set to zero. 

5029 weights = weights.fillna(0) 

5030 

5031 # Renormalize if don't sum to 1 

5032 if weights.sum() != 1: 

5033 if weights.sum() != 0: 

5034 weights = weights / weights.sum() 

5035 else: 

5036 raise ValueError("Invalid weights: weights sum to zero") 

5037 

5038 weights = weights.values 

5039 

5040 # If no frac or n, default to n=1. 

5041 if n is None and frac is None: 

5042 n = 1 

5043 elif frac is not None and frac > 1 and not replace: 

5044 raise ValueError( 

5045 "Replace has to be set to `True` when " 

5046 "upsampling the population `frac` > 1." 

5047 ) 

5048 elif n is not None and frac is None and n % 1 != 0: 

5049 raise ValueError("Only integers accepted as `n` values") 

5050 elif n is None and frac is not None: 

5051 n = int(round(frac * axis_length)) 

5052 elif n is not None and frac is not None: 

5053 raise ValueError("Please enter a value for `frac` OR `n`, not both") 

5054 

5055 # Check for negative sizes 

5056 if n < 0: 

5057 raise ValueError( 

5058 "A negative number of rows requested. Please provide positive value." 

5059 ) 

5060 

5061 locs = rs.choice(axis_length, size=n, replace=replace, p=weights) 

5062 return self.take(locs, axis=axis) 

5063 

5064 _shared_docs[ 

5065 "pipe" 

5066 ] = r""" 

5067 Apply func(self, \*args, \*\*kwargs). 

5068 

5069 Parameters 

5070 ---------- 

5071 func : function 

5072 Function to apply to the %(klass)s. 

5073 ``args``, and ``kwargs`` are passed into ``func``. 

5074 Alternatively a ``(callable, data_keyword)`` tuple where 

5075 ``data_keyword`` is a string indicating the keyword of 

5076 ``callable`` that expects the %(klass)s. 

5077 args : iterable, optional 

5078 Positional arguments passed into ``func``. 

5079 kwargs : mapping, optional 

5080 A dictionary of keyword arguments passed into ``func``. 

5081 

5082 Returns 

5083 ------- 

5084 object : the return type of ``func``. 

5085 

5086 See Also 

5087 -------- 

5088 DataFrame.apply 

5089 DataFrame.applymap 

5090 Series.map 

5091 

5092 Notes 

5093 ----- 

5094 

5095 Use ``.pipe`` when chaining together functions that expect 

5096 Series, DataFrames or GroupBy objects. Instead of writing 

5097 

5098 >>> f(g(h(df), arg1=a), arg2=b, arg3=c) 

5099 

5100 You can write 

5101 

5102 >>> (df.pipe(h) 

5103 ... .pipe(g, arg1=a) 

5104 ... .pipe(f, arg2=b, arg3=c) 

5105 ... ) 

5106 

5107 If you have a function that takes the data as (say) the second 

5108 argument, pass a tuple indicating which keyword expects the 

5109 data. For example, suppose ``f`` takes its data as ``arg2``: 

5110 

5111 >>> (df.pipe(h) 

5112 ... .pipe(g, arg1=a) 

5113 ... .pipe((f, 'arg2'), arg1=a, arg3=c) 

5114 ... ) 

5115 """ 

5116 

5117 @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) 

5118 def pipe(self, func, *args, **kwargs): 

5119 return com.pipe(self, func, *args, **kwargs) 

5120 

5121 _shared_docs["aggregate"] = dedent( 

5122 """ 

5123 Aggregate using one or more operations over the specified axis. 

5124 %(versionadded)s 

5125 Parameters 

5126 ---------- 

5127 func : function, str, list or dict 

5128 Function to use for aggregating the data. If a function, must either 

5129 work when passed a %(klass)s or when passed to %(klass)s.apply. 

5130 

5131 Accepted combinations are: 

5132 

5133 - function 

5134 - string function name 

5135 - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` 

5136 - dict of axis labels -> functions, function names or list of such. 

5137 %(axis)s 

5138 *args 

5139 Positional arguments to pass to `func`. 

5140 **kwargs 

5141 Keyword arguments to pass to `func`. 

5142 

5143 Returns 

5144 ------- 

5145 scalar, Series or DataFrame 

5146 

5147 The return can be: 

5148 

5149 * scalar : when Series.agg is called with single function 

5150 * Series : when DataFrame.agg is called with a single function 

5151 * DataFrame : when DataFrame.agg is called with several functions 

5152 

5153 Return scalar, Series or DataFrame. 

5154 %(see_also)s 

5155 Notes 

5156 ----- 

5157 `agg` is an alias for `aggregate`. Use the alias. 

5158 

5159 A passed user-defined-function will be passed a Series for evaluation. 

5160 %(examples)s""" 

5161 ) 

5162 

5163 _shared_docs[ 

5164 "transform" 

5165 ] = """ 

5166 Call ``func`` on self producing a %(klass)s with transformed values. 

5167 

5168 Produced %(klass)s will have same axis length as self. 

5169 

5170 Parameters 

5171 ---------- 

5172 func : function, str, list or dict 

5173 Function to use for transforming the data. If a function, must either 

5174 work when passed a %(klass)s or when passed to %(klass)s.apply. 

5175 

5176 Accepted combinations are: 

5177 

5178 - function 

5179 - string function name 

5180 - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` 

5181 - dict of axis labels -> functions, function names or list of such. 

5182 %(axis)s 

5183 *args 

5184 Positional arguments to pass to `func`. 

5185 **kwargs 

5186 Keyword arguments to pass to `func`. 

5187 

5188 Returns 

5189 ------- 

5190 %(klass)s 

5191 A %(klass)s that must have the same length as self. 

5192 

5193 Raises 

5194 ------ 

5195 ValueError : If the returned %(klass)s has a different length than self. 

5196 

5197 See Also 

5198 -------- 

5199 %(klass)s.agg : Only perform aggregating type operations. 

5200 %(klass)s.apply : Invoke function on a %(klass)s. 

5201 

5202 Examples 

5203 -------- 

5204 >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) 

5205 >>> df 

5206 A B 

5207 0 0 1 

5208 1 1 2 

5209 2 2 3 

5210 >>> df.transform(lambda x: x + 1) 

5211 A B 

5212 0 1 2 

5213 1 2 3 

5214 2 3 4 

5215 

5216 Even though the resulting %(klass)s must have the same length as the 

5217 input %(klass)s, it is possible to provide several input functions: 

5218 

5219 >>> s = pd.Series(range(3)) 

5220 >>> s 

5221 0 0 

5222 1 1 

5223 2 2 

5224 dtype: int64 

5225 >>> s.transform([np.sqrt, np.exp]) 

5226 sqrt exp 

5227 0 0.000000 1.000000 

5228 1 1.000000 2.718282 

5229 2 1.414214 7.389056 

5230 """ 

5231 

5232 # ---------------------------------------------------------------------- 

5233 # Attribute access 

5234 

5235 def __finalize__( 

5236 self: FrameOrSeries, other, method=None, **kwargs 

5237 ) -> FrameOrSeries: 

5238 """ 

5239 Propagate metadata from other to self. 

5240 

5241 Parameters 

5242 ---------- 

5243 other : the object from which to get the attributes that we are going 

5244 to propagate 

5245 method : optional, a passed method name ; possibly to take different 

5246 types of propagation actions based on this 

5247 

5248 """ 

5249 if isinstance(other, NDFrame): 

5250 for name in other.attrs: 

5251 self.attrs[name] = other.attrs[name] 

5252 # For subclasses using _metadata. 

5253 for name in self._metadata: 

5254 object.__setattr__(self, name, getattr(other, name, None)) 

5255 return self 

5256 

5257 def __getattr__(self, name: str): 

5258 """After regular attribute access, try looking up the name 

5259 This allows simpler access to columns for interactive use. 

5260 """ 

5261 

5262 # Note: obj.x will always call obj.__getattribute__('x') prior to 

5263 # calling obj.__getattr__('x'). 

5264 

5265 if ( 

5266 name in self._internal_names_set 

5267 or name in self._metadata 

5268 or name in self._accessors 

5269 ): 

5270 return object.__getattribute__(self, name) 

5271 else: 

5272 if self._info_axis._can_hold_identifiers_and_holds_name(name): 

5273 return self[name] 

5274 return object.__getattribute__(self, name) 

5275 

5276 def __setattr__(self, name: str, value) -> None: 

5277 """After regular attribute access, try setting the name 

5278 This allows simpler access to columns for interactive use. 

5279 """ 

5280 

5281 # first try regular attribute access via __getattribute__, so that 

5282 # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify 

5283 # the same attribute. 

5284 

5285 try: 

5286 object.__getattribute__(self, name) 

5287 return object.__setattr__(self, name, value) 

5288 except AttributeError: 

5289 pass 

5290 

5291 # if this fails, go on to more involved attribute setting 

5292 # (note that this matches __getattr__, above). 

5293 if name in self._internal_names_set: 

5294 object.__setattr__(self, name, value) 

5295 elif name in self._metadata: 

5296 object.__setattr__(self, name, value) 

5297 else: 

5298 try: 

5299 existing = getattr(self, name) 

5300 if isinstance(existing, Index): 

5301 object.__setattr__(self, name, value) 

5302 elif name in self._info_axis: 

5303 self[name] = value 

5304 else: 

5305 object.__setattr__(self, name, value) 

5306 except (AttributeError, TypeError): 

5307 if isinstance(self, ABCDataFrame) and (is_list_like(value)): 

5308 warnings.warn( 

5309 "Pandas doesn't allow columns to be " 

5310 "created via a new attribute name - see " 

5311 "https://pandas.pydata.org/pandas-docs/" 

5312 "stable/indexing.html#attribute-access", 

5313 stacklevel=2, 

5314 ) 

5315 object.__setattr__(self, name, value) 

5316 

5317 def _dir_additions(self): 

5318 """ add the string-like attributes from the info_axis. 

5319 If info_axis is a MultiIndex, it's first level values are used. 

5320 """ 

5321 additions = { 

5322 c 

5323 for c in self._info_axis.unique(level=0)[:100] 

5324 if isinstance(c, str) and c.isidentifier() 

5325 } 

5326 return super()._dir_additions().union(additions) 

5327 

5328 # ---------------------------------------------------------------------- 

5329 # Consolidation of internals 

5330 

5331 def _protect_consolidate(self, f): 

5332 """Consolidate _data -- if the blocks have changed, then clear the 

5333 cache 

5334 """ 

5335 blocks_before = len(self._data.blocks) 

5336 result = f() 

5337 if len(self._data.blocks) != blocks_before: 

5338 self._clear_item_cache() 

5339 return result 

5340 

5341 def _consolidate_inplace(self) -> None: 

5342 """Consolidate data in place and return None""" 

5343 

5344 def f(): 

5345 self._data = self._data.consolidate() 

5346 

5347 self._protect_consolidate(f) 

5348 

5349 def _consolidate(self, inplace: bool_t = False): 

5350 """ 

5351 Compute NDFrame with "consolidated" internals (data of each dtype 

5352 grouped together in a single ndarray). 

5353 

5354 Parameters 

5355 ---------- 

5356 inplace : bool, default False 

5357 If False return new object, otherwise modify existing object. 

5358 

5359 Returns 

5360 ------- 

5361 consolidated : same type as caller 

5362 """ 

5363 inplace = validate_bool_kwarg(inplace, "inplace") 

5364 if inplace: 

5365 self._consolidate_inplace() 

5366 else: 

5367 f = lambda: self._data.consolidate() 

5368 cons_data = self._protect_consolidate(f) 

5369 return self._constructor(cons_data).__finalize__(self) 

5370 

5371 @property 

5372 def _is_mixed_type(self): 

5373 f = lambda: self._data.is_mixed_type 

5374 return self._protect_consolidate(f) 

5375 

5376 @property 

5377 def _is_numeric_mixed_type(self): 

5378 f = lambda: self._data.is_numeric_mixed_type 

5379 return self._protect_consolidate(f) 

5380 

5381 @property 

5382 def _is_datelike_mixed_type(self): 

5383 f = lambda: self._data.is_datelike_mixed_type 

5384 return self._protect_consolidate(f) 

5385 

5386 def _check_inplace_setting(self, value) -> bool_t: 

5387 """ check whether we allow in-place setting with this type of value """ 

5388 

5389 if self._is_mixed_type: 

5390 if not self._is_numeric_mixed_type: 

5391 

5392 # allow an actual np.nan thru 

5393 if is_float(value) and np.isnan(value): 

5394 return True 

5395 

5396 raise TypeError( 

5397 "Cannot do inplace boolean setting on " 

5398 "mixed-types with a non np.nan value" 

5399 ) 

5400 

5401 return True 

5402 

5403 def _get_numeric_data(self): 

5404 return self._constructor(self._data.get_numeric_data()).__finalize__(self) 

5405 

5406 def _get_bool_data(self): 

5407 return self._constructor(self._data.get_bool_data()).__finalize__(self) 

5408 

5409 # ---------------------------------------------------------------------- 

5410 # Internal Interface Methods 

5411 

5412 @property 

5413 def values(self) -> np.ndarray: 

5414 """ 

5415 Return a Numpy representation of the DataFrame. 

5416 

5417 .. warning:: 

5418 

5419 We recommend using :meth:`DataFrame.to_numpy` instead. 

5420 

5421 Only the values in the DataFrame will be returned, the axes labels 

5422 will be removed. 

5423 

5424 Returns 

5425 ------- 

5426 numpy.ndarray 

5427 The values of the DataFrame. 

5428 

5429 See Also 

5430 -------- 

5431 DataFrame.to_numpy : Recommended alternative to this method. 

5432 DataFrame.index : Retrieve the index labels. 

5433 DataFrame.columns : Retrieving the column names. 

5434 

5435 Notes 

5436 ----- 

5437 The dtype will be a lower-common-denominator dtype (implicit 

5438 upcasting); that is to say if the dtypes (even of numeric types) 

5439 are mixed, the one that accommodates all will be chosen. Use this 

5440 with care if you are not dealing with the blocks. 

5441 

5442 e.g. If the dtypes are float16 and float32, dtype will be upcast to 

5443 float32. If dtypes are int32 and uint8, dtype will be upcast to 

5444 int32. By :func:`numpy.find_common_type` convention, mixing int64 

5445 and uint64 will result in a float64 dtype. 

5446 

5447 Examples 

5448 -------- 

5449 A DataFrame where all columns are the same type (e.g., int64) results 

5450 in an array of the same type. 

5451 

5452 >>> df = pd.DataFrame({'age': [ 3, 29], 

5453 ... 'height': [94, 170], 

5454 ... 'weight': [31, 115]}) 

5455 >>> df 

5456 age height weight 

5457 0 3 94 31 

5458 1 29 170 115 

5459 >>> df.dtypes 

5460 age int64 

5461 height int64 

5462 weight int64 

5463 dtype: object 

5464 >>> df.values 

5465 array([[ 3, 94, 31], 

5466 [ 29, 170, 115]], dtype=int64) 

5467 

5468 A DataFrame with mixed type columns(e.g., str/object, int64, float32) 

5469 results in an ndarray of the broadest type that accommodates these 

5470 mixed types (e.g., object). 

5471 

5472 >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'), 

5473 ... ('lion', 80.5, 1), 

5474 ... ('monkey', np.nan, None)], 

5475 ... columns=('name', 'max_speed', 'rank')) 

5476 >>> df2.dtypes 

5477 name object 

5478 max_speed float64 

5479 rank object 

5480 dtype: object 

5481 >>> df2.values 

5482 array([['parrot', 24.0, 'second'], 

5483 ['lion', 80.5, 1], 

5484 ['monkey', nan, None]], dtype=object) 

5485 """ 

5486 self._consolidate_inplace() 

5487 return self._data.as_array(transpose=self._AXIS_REVERSED) 

5488 

5489 @property 

5490 def _values(self) -> np.ndarray: 

5491 """internal implementation""" 

5492 return self.values 

5493 

5494 @property 

5495 def _get_values(self) -> np.ndarray: 

5496 # compat 

5497 return self.values 

5498 

5499 def _internal_get_values(self) -> np.ndarray: 

5500 """ 

5501 Return an ndarray after converting sparse values to dense. 

5502 

5503 This is the same as ``.values`` for non-sparse data. For sparse 

5504 data contained in a `SparseArray`, the data are first 

5505 converted to a dense representation. 

5506 

5507 Returns 

5508 ------- 

5509 numpy.ndarray 

5510 Numpy representation of DataFrame. 

5511 

5512 See Also 

5513 -------- 

5514 values : Numpy representation of DataFrame. 

5515 SparseArray : Container for sparse data. 

5516 """ 

5517 return self.values 

5518 

5519 @property 

5520 def dtypes(self): 

5521 """ 

5522 Return the dtypes in the DataFrame. 

5523 

5524 This returns a Series with the data type of each column. 

5525 The result's index is the original DataFrame's columns. Columns 

5526 with mixed types are stored with the ``object`` dtype. See 

5527 :ref:`the User Guide <basics.dtypes>` for more. 

5528 

5529 Returns 

5530 ------- 

5531 pandas.Series 

5532 The data type of each column. 

5533 

5534 Examples 

5535 -------- 

5536 >>> df = pd.DataFrame({'float': [1.0], 

5537 ... 'int': [1], 

5538 ... 'datetime': [pd.Timestamp('20180310')], 

5539 ... 'string': ['foo']}) 

5540 >>> df.dtypes 

5541 float float64 

5542 int int64 

5543 datetime datetime64[ns] 

5544 string object 

5545 dtype: object 

5546 """ 

5547 from pandas import Series 

5548 

5549 return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) 

5550 

5551 def _to_dict_of_blocks(self, copy: bool_t = True): 

5552 """ 

5553 Return a dict of dtype -> Constructor Types that 

5554 each is a homogeneous dtype. 

5555 

5556 Internal ONLY 

5557 """ 

5558 return { 

5559 k: self._constructor(v).__finalize__(self) 

5560 for k, v, in self._data.to_dict(copy=copy).items() 

5561 } 

5562 

5563 def astype( 

5564 self: FrameOrSeries, dtype, copy: bool_t = True, errors: str = "raise" 

5565 ) -> FrameOrSeries: 

5566 """ 

5567 Cast a pandas object to a specified dtype ``dtype``. 

5568 

5569 Parameters 

5570 ---------- 

5571 dtype : data type, or dict of column name -> data type 

5572 Use a numpy.dtype or Python type to cast entire pandas object to 

5573 the same type. Alternatively, use {col: dtype, ...}, where col is a 

5574 column label and dtype is a numpy.dtype or Python type to cast one 

5575 or more of the DataFrame's columns to column-specific types. 

5576 copy : bool, default True 

5577 Return a copy when ``copy=True`` (be very careful setting 

5578 ``copy=False`` as changes to values then may propagate to other 

5579 pandas objects). 

5580 errors : {'raise', 'ignore'}, default 'raise' 

5581 Control raising of exceptions on invalid data for provided dtype. 

5582 

5583 - ``raise`` : allow exceptions to be raised 

5584 - ``ignore`` : suppress exceptions. On error return original object. 

5585 

5586 Returns 

5587 ------- 

5588 casted : same type as caller 

5589 

5590 See Also 

5591 -------- 

5592 to_datetime : Convert argument to datetime. 

5593 to_timedelta : Convert argument to timedelta. 

5594 to_numeric : Convert argument to a numeric type. 

5595 numpy.ndarray.astype : Cast a numpy array to a specified type. 

5596 

5597 Examples 

5598 -------- 

5599 Create a DataFrame: 

5600 

5601 >>> d = {'col1': [1, 2], 'col2': [3, 4]} 

5602 >>> df = pd.DataFrame(data=d) 

5603 >>> df.dtypes 

5604 col1 int64 

5605 col2 int64 

5606 dtype: object 

5607 

5608 Cast all columns to int32: 

5609 

5610 >>> df.astype('int32').dtypes 

5611 col1 int32 

5612 col2 int32 

5613 dtype: object 

5614 

5615 Cast col1 to int32 using a dictionary: 

5616 

5617 >>> df.astype({'col1': 'int32'}).dtypes 

5618 col1 int32 

5619 col2 int64 

5620 dtype: object 

5621 

5622 Create a series: 

5623 

5624 >>> ser = pd.Series([1, 2], dtype='int32') 

5625 >>> ser 

5626 0 1 

5627 1 2 

5628 dtype: int32 

5629 >>> ser.astype('int64') 

5630 0 1 

5631 1 2 

5632 dtype: int64 

5633 

5634 Convert to categorical type: 

5635 

5636 >>> ser.astype('category') 

5637 0 1 

5638 1 2 

5639 dtype: category 

5640 Categories (2, int64): [1, 2] 

5641 

5642 Convert to ordered categorical type with custom ordering: 

5643 

5644 >>> cat_dtype = pd.api.types.CategoricalDtype( 

5645 ... categories=[2, 1], ordered=True) 

5646 >>> ser.astype(cat_dtype) 

5647 0 1 

5648 1 2 

5649 dtype: category 

5650 Categories (2, int64): [2 < 1] 

5651 

5652 Note that using ``copy=False`` and changing data on a new 

5653 pandas object may propagate changes: 

5654 

5655 >>> s1 = pd.Series([1, 2]) 

5656 >>> s2 = s1.astype('int64', copy=False) 

5657 >>> s2[0] = 10 

5658 >>> s1 # note that s1[0] has changed too 

5659 0 10 

5660 1 2 

5661 dtype: int64 

5662 """ 

5663 if is_dict_like(dtype): 

5664 if self.ndim == 1: # i.e. Series 

5665 if len(dtype) > 1 or self.name not in dtype: 

5666 raise KeyError( 

5667 "Only the Series name can be used for " 

5668 "the key in Series dtype mappings." 

5669 ) 

5670 new_type = dtype[self.name] 

5671 return self.astype(new_type, copy, errors) 

5672 

5673 for col_name in dtype.keys(): 

5674 if col_name not in self: 

5675 raise KeyError( 

5676 "Only a column name can be used for the " 

5677 "key in a dtype mappings argument." 

5678 ) 

5679 results = [] 

5680 for col_name, col in self.items(): 

5681 if col_name in dtype: 

5682 results.append( 

5683 col.astype(dtype=dtype[col_name], copy=copy, errors=errors) 

5684 ) 

5685 else: 

5686 results.append(col.copy() if copy else col) 

5687 

5688 elif is_extension_array_dtype(dtype) and self.ndim > 1: 

5689 # GH 18099/22869: columnwise conversion to extension dtype 

5690 # GH 24704: use iloc to handle duplicate column names 

5691 results = [ 

5692 self.iloc[:, i].astype(dtype, copy=copy) 

5693 for i in range(len(self.columns)) 

5694 ] 

5695 

5696 else: 

5697 # else, only a single dtype is given 

5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors) 

5699 return self._constructor(new_data).__finalize__(self) 

5700 

5701 # GH 19920: retain column metadata after concat 

5702 result = pd.concat(results, axis=1, copy=False) 

5703 result.columns = self.columns 

5704 return result 

5705 

5706 def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: 

5707 """ 

5708 Make a copy of this object's indices and data. 

5709 

5710 When ``deep=True`` (default), a new object will be created with a 

5711 copy of the calling object's data and indices. Modifications to 

5712 the data or indices of the copy will not be reflected in the 

5713 original object (see notes below). 

5714 

5715 When ``deep=False``, a new object will be created without copying 

5716 the calling object's data or index (only references to the data 

5717 and index are copied). Any changes to the data of the original 

5718 will be reflected in the shallow copy (and vice versa). 

5719 

5720 Parameters 

5721 ---------- 

5722 deep : bool, default True 

5723 Make a deep copy, including a copy of the data and the indices. 

5724 With ``deep=False`` neither the indices nor the data are copied. 

5725 

5726 Returns 

5727 ------- 

5728 copy : Series or DataFrame 

5729 Object type matches caller. 

5730 

5731 Notes 

5732 ----- 

5733 When ``deep=True``, data is copied but actual Python objects 

5734 will not be copied recursively, only the reference to the object. 

5735 This is in contrast to `copy.deepcopy` in the Standard Library, 

5736 which recursively copies object data (see examples below). 

5737 

5738 While ``Index`` objects are copied when ``deep=True``, the underlying 

5739 numpy array is not copied for performance reasons. Since ``Index`` is 

5740 immutable, the underlying data can be safely shared and a copy 

5741 is not needed. 

5742 

5743 Examples 

5744 -------- 

5745 >>> s = pd.Series([1, 2], index=["a", "b"]) 

5746 >>> s 

5747 a 1 

5748 b 2 

5749 dtype: int64 

5750 

5751 >>> s_copy = s.copy() 

5752 >>> s_copy 

5753 a 1 

5754 b 2 

5755 dtype: int64 

5756 

5757 **Shallow copy versus default (deep) copy:** 

5758 

5759 >>> s = pd.Series([1, 2], index=["a", "b"]) 

5760 >>> deep = s.copy() 

5761 >>> shallow = s.copy(deep=False) 

5762 

5763 Shallow copy shares data and index with original. 

5764 

5765 >>> s is shallow 

5766 False 

5767 >>> s.values is shallow.values and s.index is shallow.index 

5768 True 

5769 

5770 Deep copy has own copy of data and index. 

5771 

5772 >>> s is deep 

5773 False 

5774 >>> s.values is deep.values or s.index is deep.index 

5775 False 

5776 

5777 Updates to the data shared by shallow copy and original is reflected 

5778 in both; deep copy remains unchanged. 

5779 

5780 >>> s[0] = 3 

5781 >>> shallow[1] = 4 

5782 >>> s 

5783 a 3 

5784 b 4 

5785 dtype: int64 

5786 >>> shallow 

5787 a 3 

5788 b 4 

5789 dtype: int64 

5790 >>> deep 

5791 a 1 

5792 b 2 

5793 dtype: int64 

5794 

5795 Note that when copying an object containing Python objects, a deep copy 

5796 will copy the data, but will not do so recursively. Updating a nested 

5797 data object will be reflected in the deep copy. 

5798 

5799 >>> s = pd.Series([[1, 2], [3, 4]]) 

5800 >>> deep = s.copy() 

5801 >>> s[0][0] = 10 

5802 >>> s 

5803 0 [10, 2] 

5804 1 [3, 4] 

5805 dtype: object 

5806 >>> deep 

5807 0 [10, 2] 

5808 1 [3, 4] 

5809 dtype: object 

5810 """ 

5811 data = self._data.copy(deep=deep) 

5812 return self._constructor(data).__finalize__(self) 

5813 

5814 def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: 

5815 return self.copy(deep=deep) 

5816 

5817 def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: 

5818 """ 

5819 Parameters 

5820 ---------- 

5821 memo, default None 

5822 Standard signature. Unused 

5823 """ 

5824 return self.copy(deep=True) 

5825 

5826 def _convert( 

5827 self: FrameOrSeries, 

5828 datetime: bool_t = False, 

5829 numeric: bool_t = False, 

5830 timedelta: bool_t = False, 

5831 coerce: bool_t = False, 

5832 copy: bool_t = True, 

5833 ) -> FrameOrSeries: 

5834 """ 

5835 Attempt to infer better dtype for object columns 

5836 

5837 Parameters 

5838 ---------- 

5839 datetime : bool, default False 

5840 If True, convert to date where possible. 

5841 numeric : bool, default False 

5842 If True, attempt to convert to numbers (including strings), with 

5843 unconvertible values becoming NaN. 

5844 timedelta : bool, default False 

5845 If True, convert to timedelta where possible. 

5846 coerce : bool, default False 

5847 If True, force conversion with unconvertible values converted to 

5848 nulls (NaN or NaT). 

5849 copy : bool, default True 

5850 If True, return a copy even if no copy is necessary (e.g. no 

5851 conversion was done). Note: This is meant for internal use, and 

5852 should not be confused with inplace. 

5853 

5854 Returns 

5855 ------- 

5856 converted : same as input object 

5857 """ 

5858 validate_bool_kwarg(datetime, "datetime") 

5859 validate_bool_kwarg(numeric, "numeric") 

5860 validate_bool_kwarg(timedelta, "timedelta") 

5861 validate_bool_kwarg(coerce, "coerce") 

5862 validate_bool_kwarg(copy, "copy") 

5863 return self._constructor( 

5864 self._data.convert( 

5865 datetime=datetime, 

5866 numeric=numeric, 

5867 timedelta=timedelta, 

5868 coerce=coerce, 

5869 copy=copy, 

5870 ) 

5871 ).__finalize__(self) 

5872 

5873 def infer_objects(self: FrameOrSeries) -> FrameOrSeries: 

5874 """ 

5875 Attempt to infer better dtypes for object columns. 

5876 

5877 Attempts soft conversion of object-dtyped 

5878 columns, leaving non-object and unconvertible 

5879 columns unchanged. The inference rules are the 

5880 same as during normal Series/DataFrame construction. 

5881 

5882 .. versionadded:: 0.21.0 

5883 

5884 Returns 

5885 ------- 

5886 converted : same type as input object 

5887 

5888 See Also 

5889 -------- 

5890 to_datetime : Convert argument to datetime. 

5891 to_timedelta : Convert argument to timedelta. 

5892 to_numeric : Convert argument to numeric type. 

5893 convert_dtypes : Convert argument to best possible dtype. 

5894 

5895 Examples 

5896 -------- 

5897 >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) 

5898 >>> df = df.iloc[1:] 

5899 >>> df 

5900 A 

5901 1 1 

5902 2 2 

5903 3 3 

5904 

5905 >>> df.dtypes 

5906 A object 

5907 dtype: object 

5908 

5909 >>> df.infer_objects().dtypes 

5910 A int64 

5911 dtype: object 

5912 """ 

5913 # numeric=False necessary to only soft convert; 

5914 # python objects will still be converted to 

5915 # native numpy numeric types 

5916 return self._constructor( 

5917 self._data.convert( 

5918 datetime=True, numeric=False, timedelta=True, coerce=False, copy=True 

5919 ) 

5920 ).__finalize__(self) 

5921 

5922 def convert_dtypes( 

5923 self: FrameOrSeries, 

5924 infer_objects: bool_t = True, 

5925 convert_string: bool_t = True, 

5926 convert_integer: bool_t = True, 

5927 convert_boolean: bool_t = True, 

5928 ) -> FrameOrSeries: 

5929 """ 

5930 Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. 

5931 

5932 .. versionadded:: 1.0.0 

5933 

5934 Parameters 

5935 ---------- 

5936 infer_objects : bool, default True 

5937 Whether object dtypes should be converted to the best possible types. 

5938 convert_string : bool, default True 

5939 Whether object dtypes should be converted to ``StringDtype()``. 

5940 convert_integer : bool, default True 

5941 Whether, if possible, conversion can be done to integer extension types. 

5942 convert_boolean : bool, defaults True 

5943 Whether object dtypes should be converted to ``BooleanDtypes()``. 

5944 

5945 Returns 

5946 ------- 

5947 Series or DataFrame 

5948 Copy of input object with new dtype. 

5949 

5950 See Also 

5951 -------- 

5952 infer_objects : Infer dtypes of objects. 

5953 to_datetime : Convert argument to datetime. 

5954 to_timedelta : Convert argument to timedelta. 

5955 to_numeric : Convert argument to a numeric type. 

5956 

5957 Notes 

5958 ----- 

5959 

5960 By default, ``convert_dtypes`` will attempt to convert a Series (or each 

5961 Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options 

5962 ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is 

5963 possible to turn off individual conversions to ``StringDtype``, the integer 

5964 extension types or ``BooleanDtype``, respectively. 

5965 

5966 For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference 

5967 rules as during normal Series/DataFrame construction. Then, if possible, 

5968 convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension 

5969 type, otherwise leave as ``object``. 

5970 

5971 If the dtype is integer, convert to an appropriate integer extension type. 

5972 

5973 If the dtype is numeric, and consists of all integers, convert to an 

5974 appropriate integer extension type. 

5975 

5976 In the future, as new dtypes are added that support ``pd.NA``, the results 

5977 of this method will change to support those new dtypes. 

5978 

5979 Examples 

5980 -------- 

5981 >>> df = pd.DataFrame( 

5982 ... { 

5983 ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), 

5984 ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), 

5985 ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), 

5986 ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), 

5987 ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), 

5988 ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), 

5989 ... } 

5990 ... ) 

5991 

5992 Start with a DataFrame with default dtypes. 

5993 

5994 >>> df 

5995 a b c d e f 

5996 0 1 x True h 10.0 NaN 

5997 1 2 y False i NaN 100.5 

5998 2 3 z NaN NaN 20.0 200.0 

5999 

6000 >>> df.dtypes 

6001 a int32 

6002 b object 

6003 c object 

6004 d object 

6005 e float64 

6006 f float64 

6007 dtype: object 

6008 

6009 Convert the DataFrame to use best possible dtypes. 

6010 

6011 >>> dfn = df.convert_dtypes() 

6012 >>> dfn 

6013 a b c d e f 

6014 0 1 x True h 10 NaN 

6015 1 2 y False i <NA> 100.5 

6016 2 3 z <NA> <NA> 20 200.0 

6017 

6018 >>> dfn.dtypes 

6019 a Int32 

6020 b string 

6021 c boolean 

6022 d string 

6023 e Int64 

6024 f float64 

6025 dtype: object 

6026 

6027 Start with a Series of strings and missing data represented by ``np.nan``. 

6028 

6029 >>> s = pd.Series(["a", "b", np.nan]) 

6030 >>> s 

6031 0 a 

6032 1 b 

6033 2 NaN 

6034 dtype: object 

6035 

6036 Obtain a Series with dtype ``StringDtype``. 

6037 

6038 >>> s.convert_dtypes() 

6039 0 a 

6040 1 b 

6041 2 <NA> 

6042 dtype: string 

6043 """ 

6044 if self.ndim == 1: 

6045 return self._convert_dtypes( 

6046 infer_objects, convert_string, convert_integer, convert_boolean 

6047 ) 

6048 else: 

6049 results = [ 

6050 col._convert_dtypes( 

6051 infer_objects, convert_string, convert_integer, convert_boolean 

6052 ) 

6053 for col_name, col in self.items() 

6054 ] 

6055 result = pd.concat(results, axis=1, copy=False) 

6056 return result 

6057 

6058 # ---------------------------------------------------------------------- 

6059 # Filling NA's 

6060 

6061 def fillna( 

6062 self: FrameOrSeries, 

6063 value=None, 

6064 method=None, 

6065 axis=None, 

6066 inplace: bool_t = False, 

6067 limit=None, 

6068 downcast=None, 

6069 ) -> Optional[FrameOrSeries]: 

6070 """ 

6071 Fill NA/NaN values using the specified method. 

6072 

6073 Parameters 

6074 ---------- 

6075 value : scalar, dict, Series, or DataFrame 

6076 Value to use to fill holes (e.g. 0), alternately a 

6077 dict/Series/DataFrame of values specifying which value to use for 

6078 each index (for a Series) or column (for a DataFrame). Values not 

6079 in the dict/Series/DataFrame will not be filled. This value cannot 

6080 be a list. 

6081 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

6082 Method to use for filling holes in reindexed Series 

6083 pad / ffill: propagate last valid observation forward to next valid 

6084 backfill / bfill: use next valid observation to fill gap. 

6085 axis : %(axes_single_arg)s 

6086 Axis along which to fill missing values. 

6087 inplace : bool, default False 

6088 If True, fill in-place. Note: this will modify any 

6089 other views on this object (e.g., a no-copy slice for a column in a 

6090 DataFrame). 

6091 limit : int, default None 

6092 If method is specified, this is the maximum number of consecutive 

6093 NaN values to forward/backward fill. In other words, if there is 

6094 a gap with more than this number of consecutive NaNs, it will only 

6095 be partially filled. If method is not specified, this is the 

6096 maximum number of entries along the entire axis where NaNs will be 

6097 filled. Must be greater than 0 if not None. 

6098 downcast : dict, default is None 

6099 A dict of item->dtype of what to downcast if possible, 

6100 or the string 'infer' which will try to downcast to an appropriate 

6101 equal type (e.g. float64 to int64 if possible). 

6102 

6103 Returns 

6104 ------- 

6105 %(klass)s or None 

6106 Object with missing values filled or None if ``inplace=True``. 

6107 

6108 See Also 

6109 -------- 

6110 interpolate : Fill NaN values using interpolation. 

6111 reindex : Conform object to new index. 

6112 asfreq : Convert TimeSeries to specified frequency. 

6113 

6114 Examples 

6115 -------- 

6116 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], 

6117 ... [3, 4, np.nan, 1], 

6118 ... [np.nan, np.nan, np.nan, 5], 

6119 ... [np.nan, 3, np.nan, 4]], 

6120 ... columns=list('ABCD')) 

6121 >>> df 

6122 A B C D 

6123 0 NaN 2.0 NaN 0 

6124 1 3.0 4.0 NaN 1 

6125 2 NaN NaN NaN 5 

6126 3 NaN 3.0 NaN 4 

6127 

6128 Replace all NaN elements with 0s. 

6129 

6130 >>> df.fillna(0) 

6131 A B C D 

6132 0 0.0 2.0 0.0 0 

6133 1 3.0 4.0 0.0 1 

6134 2 0.0 0.0 0.0 5 

6135 3 0.0 3.0 0.0 4 

6136 

6137 We can also propagate non-null values forward or backward. 

6138 

6139 >>> df.fillna(method='ffill') 

6140 A B C D 

6141 0 NaN 2.0 NaN 0 

6142 1 3.0 4.0 NaN 1 

6143 2 3.0 4.0 NaN 5 

6144 3 3.0 3.0 NaN 4 

6145 

6146 Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 

6147 2, and 3 respectively. 

6148 

6149 >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} 

6150 >>> df.fillna(value=values) 

6151 A B C D 

6152 0 0.0 2.0 2.0 0 

6153 1 3.0 4.0 2.0 1 

6154 2 0.0 1.0 2.0 5 

6155 3 0.0 3.0 2.0 4 

6156 

6157 Only replace the first NaN element. 

6158 

6159 >>> df.fillna(value=values, limit=1) 

6160 A B C D 

6161 0 0.0 2.0 2.0 0 

6162 1 3.0 4.0 NaN 1 

6163 2 NaN 1.0 NaN 5 

6164 3 NaN 3.0 NaN 4 

6165 """ 

6166 inplace = validate_bool_kwarg(inplace, "inplace") 

6167 value, method = validate_fillna_kwargs(value, method) 

6168 

6169 self._consolidate_inplace() 

6170 

6171 # set the default here, so functions examining the signaure 

6172 # can detect if something was set (e.g. in groupby) (GH9221) 

6173 if axis is None: 

6174 axis = 0 

6175 axis = self._get_axis_number(axis) 

6176 

6177 if value is None: 

6178 

6179 if self._is_mixed_type and axis == 1: 

6180 if inplace: 

6181 raise NotImplementedError() 

6182 result = self.T.fillna(method=method, limit=limit).T 

6183 

6184 # need to downcast here because of all of the transposes 

6185 result._data = result._data.downcast() 

6186 

6187 return result 

6188 

6189 new_data = self._data.interpolate( 

6190 method=method, 

6191 axis=axis, 

6192 limit=limit, 

6193 inplace=inplace, 

6194 coerce=True, 

6195 downcast=downcast, 

6196 ) 

6197 else: 

6198 if len(self._get_axis(axis)) == 0: 

6199 return self 

6200 

6201 if self.ndim == 1: 

6202 if isinstance(value, (dict, ABCSeries)): 

6203 value = create_series_with_explicit_dtype( 

6204 value, dtype_if_empty=object 

6205 ) 

6206 elif not is_list_like(value): 

6207 pass 

6208 else: 

6209 raise TypeError( 

6210 '"value" parameter must be a scalar, dict ' 

6211 "or Series, but you passed a " 

6212 f'"{type(value).__name__}"' 

6213 ) 

6214 

6215 new_data = self._data.fillna( 

6216 value=value, limit=limit, inplace=inplace, downcast=downcast 

6217 ) 

6218 

6219 elif isinstance(value, (dict, ABCSeries)): 

6220 if axis == 1: 

6221 raise NotImplementedError( 

6222 "Currently only can fill " 

6223 "with dict/Series column " 

6224 "by column" 

6225 ) 

6226 

6227 result = self if inplace else self.copy() 

6228 for k, v in value.items(): 

6229 if k not in result: 

6230 continue 

6231 obj = result[k] 

6232 obj.fillna(v, limit=limit, inplace=True, downcast=downcast) 

6233 return result if not inplace else None 

6234 

6235 elif not is_list_like(value): 

6236 new_data = self._data.fillna( 

6237 value=value, limit=limit, inplace=inplace, downcast=downcast 

6238 ) 

6239 elif isinstance(value, ABCDataFrame) and self.ndim == 2: 

6240 new_data = self.where(self.notna(), value) 

6241 else: 

6242 raise ValueError(f"invalid fill value with a {type(value)}") 

6243 

6244 if inplace: 

6245 self._update_inplace(new_data) 

6246 return None 

6247 else: 

6248 return self._constructor(new_data).__finalize__(self) 

6249 

6250 def ffill( 

6251 self: FrameOrSeries, 

6252 axis=None, 

6253 inplace: bool_t = False, 

6254 limit=None, 

6255 downcast=None, 

6256 ) -> Optional[FrameOrSeries]: 

6257 """ 

6258 Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. 

6259 

6260 Returns 

6261 ------- 

6262 %(klass)s or None 

6263 Object with missing values filled or None if ``inplace=True``. 

6264 """ 

6265 return self.fillna( 

6266 method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

6267 ) 

6268 

6269 def bfill( 

6270 self: FrameOrSeries, 

6271 axis=None, 

6272 inplace: bool_t = False, 

6273 limit=None, 

6274 downcast=None, 

6275 ) -> Optional[FrameOrSeries]: 

6276 """ 

6277 Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. 

6278 

6279 Returns 

6280 ------- 

6281 %(klass)s or None 

6282 Object with missing values filled or None if ``inplace=True``. 

6283 """ 

6284 return self.fillna( 

6285 method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

6286 ) 

6287 

6288 _shared_docs[ 

6289 "replace" 

6290 ] = """ 

6291 Replace values given in `to_replace` with `value`. 

6292 

6293 Values of the %(klass)s are replaced with other values dynamically. 

6294 This differs from updating with ``.loc`` or ``.iloc``, which require 

6295 you to specify a location to update with some value. 

6296 

6297 Parameters 

6298 ---------- 

6299 to_replace : str, regex, list, dict, Series, int, float, or None 

6300 How to find the values that will be replaced. 

6301 

6302 * numeric, str or regex: 

6303 

6304 - numeric: numeric values equal to `to_replace` will be 

6305 replaced with `value` 

6306 - str: string exactly matching `to_replace` will be replaced 

6307 with `value` 

6308 - regex: regexs matching `to_replace` will be replaced with 

6309 `value` 

6310 

6311 * list of str, regex, or numeric: 

6312 

6313 - First, if `to_replace` and `value` are both lists, they 

6314 **must** be the same length. 

6315 - Second, if ``regex=True`` then all of the strings in **both** 

6316 lists will be interpreted as regexs otherwise they will match 

6317 directly. This doesn't matter much for `value` since there 

6318 are only a few possible substitution regexes you can use. 

6319 - str, regex and numeric rules apply as above. 

6320 

6321 * dict: 

6322 

6323 - Dicts can be used to specify different replacement values 

6324 for different existing values. For example, 

6325 ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and 

6326 'y' with 'z'. To use a dict in this way the `value` 

6327 parameter should be `None`. 

6328 - For a DataFrame a dict can specify that different values 

6329 should be replaced in different columns. For example, 

6330 ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a' 

6331 and the value 'z' in column 'b' and replaces these values 

6332 with whatever is specified in `value`. The `value` parameter 

6333 should not be ``None`` in this case. You can treat this as a 

6334 special case of passing two lists except that you are 

6335 specifying the column to search in. 

6336 - For a DataFrame nested dictionaries, e.g., 

6337 ``{'a': {'b': np.nan}}``, are read as follows: look in column 

6338 'a' for the value 'b' and replace it with NaN. The `value` 

6339 parameter should be ``None`` to use a nested dict in this 

6340 way. You can nest regular expressions as well. Note that 

6341 column names (the top-level dictionary keys in a nested 

6342 dictionary) **cannot** be regular expressions. 

6343 

6344 * None: 

6345 

6346 - This means that the `regex` argument must be a string, 

6347 compiled regular expression, or list, dict, ndarray or 

6348 Series of such elements. If `value` is also ``None`` then 

6349 this **must** be a nested dictionary or Series. 

6350 

6351 See the examples section for examples of each of these. 

6352 value : scalar, dict, list, str, regex, default None 

6353 Value to replace any values matching `to_replace` with. 

6354 For a DataFrame a dict of values can be used to specify which 

6355 value to use for each column (columns not in the dict will not be 

6356 filled). Regular expressions, strings and lists or dicts of such 

6357 objects are also allowed. 

6358 inplace : bool, default False 

6359 If True, in place. Note: this will modify any 

6360 other views on this object (e.g. a column from a DataFrame). 

6361 Returns the caller if this is True. 

6362 limit : int, default None 

6363 Maximum size gap to forward or backward fill. 

6364 regex : bool or same types as `to_replace`, default False 

6365 Whether to interpret `to_replace` and/or `value` as regular 

6366 expressions. If this is ``True`` then `to_replace` *must* be a 

6367 string. Alternatively, this could be a regular expression or a 

6368 list, dict, or array of regular expressions in which case 

6369 `to_replace` must be ``None``. 

6370 method : {'pad', 'ffill', 'bfill', `None`} 

6371 The method to use when for replacement, when `to_replace` is a 

6372 scalar, list or tuple and `value` is ``None``. 

6373 

6374 .. versionchanged:: 0.23.0 

6375 Added to DataFrame. 

6376 

6377 Returns 

6378 ------- 

6379 %(klass)s 

6380 Object after replacement. 

6381 

6382 Raises 

6383 ------ 

6384 AssertionError 

6385 * If `regex` is not a ``bool`` and `to_replace` is not 

6386 ``None``. 

6387 TypeError 

6388 * If `to_replace` is a ``dict`` and `value` is not a ``list``, 

6389 ``dict``, ``ndarray``, or ``Series`` 

6390 * If `to_replace` is ``None`` and `regex` is not compilable 

6391 into a regular expression or is a list, dict, ndarray, or 

6392 Series. 

6393 * When replacing multiple ``bool`` or ``datetime64`` objects and 

6394 the arguments to `to_replace` does not match the type of the 

6395 value being replaced 

6396 ValueError 

6397 * If a ``list`` or an ``ndarray`` is passed to `to_replace` and 

6398 `value` but they are not the same length. 

6399 

6400 See Also 

6401 -------- 

6402 %(klass)s.fillna : Fill NA values. 

6403 %(klass)s.where : Replace values based on boolean condition. 

6404 Series.str.replace : Simple string replacement. 

6405 

6406 Notes 

6407 ----- 

6408 * Regex substitution is performed under the hood with ``re.sub``. The 

6409 rules for substitution for ``re.sub`` are the same. 

6410 * Regular expressions will only substitute on strings, meaning you 

6411 cannot provide, for example, a regular expression matching floating 

6412 point numbers and expect the columns in your frame that have a 

6413 numeric dtype to be matched. However, if those floating point 

6414 numbers *are* strings, then you can do this. 

6415 * This method has *a lot* of options. You are encouraged to experiment 

6416 and play with this method to gain intuition about how it works. 

6417 * When dict is used as the `to_replace` value, it is like 

6418 key(s) in the dict are the to_replace part and 

6419 value(s) in the dict are the value parameter. 

6420 

6421 Examples 

6422 -------- 

6423 

6424 **Scalar `to_replace` and `value`** 

6425 

6426 >>> s = pd.Series([0, 1, 2, 3, 4]) 

6427 >>> s.replace(0, 5) 

6428 0 5 

6429 1 1 

6430 2 2 

6431 3 3 

6432 4 4 

6433 dtype: int64 

6434 

6435 >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], 

6436 ... 'B': [5, 6, 7, 8, 9], 

6437 ... 'C': ['a', 'b', 'c', 'd', 'e']}) 

6438 >>> df.replace(0, 5) 

6439 A B C 

6440 0 5 5 a 

6441 1 1 6 b 

6442 2 2 7 c 

6443 3 3 8 d 

6444 4 4 9 e 

6445 

6446 **List-like `to_replace`** 

6447 

6448 >>> df.replace([0, 1, 2, 3], 4) 

6449 A B C 

6450 0 4 5 a 

6451 1 4 6 b 

6452 2 4 7 c 

6453 3 4 8 d 

6454 4 4 9 e 

6455 

6456 >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) 

6457 A B C 

6458 0 4 5 a 

6459 1 3 6 b 

6460 2 2 7 c 

6461 3 1 8 d 

6462 4 4 9 e 

6463 

6464 >>> s.replace([1, 2], method='bfill') 

6465 0 0 

6466 1 3 

6467 2 3 

6468 3 3 

6469 4 4 

6470 dtype: int64 

6471 

6472 **dict-like `to_replace`** 

6473 

6474 >>> df.replace({0: 10, 1: 100}) 

6475 A B C 

6476 0 10 5 a 

6477 1 100 6 b 

6478 2 2 7 c 

6479 3 3 8 d 

6480 4 4 9 e 

6481 

6482 >>> df.replace({'A': 0, 'B': 5}, 100) 

6483 A B C 

6484 0 100 100 a 

6485 1 1 6 b 

6486 2 2 7 c 

6487 3 3 8 d 

6488 4 4 9 e 

6489 

6490 >>> df.replace({'A': {0: 100, 4: 400}}) 

6491 A B C 

6492 0 100 5 a 

6493 1 1 6 b 

6494 2 2 7 c 

6495 3 3 8 d 

6496 4 400 9 e 

6497 

6498 **Regular expression `to_replace`** 

6499 

6500 >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], 

6501 ... 'B': ['abc', 'bar', 'xyz']}) 

6502 >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) 

6503 A B 

6504 0 new abc 

6505 1 foo new 

6506 2 bait xyz 

6507 

6508 >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) 

6509 A B 

6510 0 new abc 

6511 1 foo bar 

6512 2 bait xyz 

6513 

6514 >>> df.replace(regex=r'^ba.$', value='new') 

6515 A B 

6516 0 new abc 

6517 1 foo new 

6518 2 bait xyz 

6519 

6520 >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'}) 

6521 A B 

6522 0 new abc 

6523 1 xyz new 

6524 2 bait xyz 

6525 

6526 >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') 

6527 A B 

6528 0 new abc 

6529 1 new new 

6530 2 bait xyz 

6531 

6532 Note that when replacing multiple ``bool`` or ``datetime64`` objects, 

6533 the data types in the `to_replace` parameter must match the data 

6534 type of the value being replaced: 

6535 

6536 >>> df = pd.DataFrame({'A': [True, False, True], 

6537 ... 'B': [False, True, False]}) 

6538 >>> df.replace({'a string': 'new value', True: False}) # raises 

6539 Traceback (most recent call last): 

6540 ... 

6541 TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' 

6542 

6543 This raises a ``TypeError`` because one of the ``dict`` keys is not of 

6544 the correct type for replacement. 

6545 

6546 Compare the behavior of ``s.replace({'a': None})`` and 

6547 ``s.replace('a', None)`` to understand the peculiarities 

6548 of the `to_replace` parameter: 

6549 

6550 >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) 

6551 

6552 When one uses a dict as the `to_replace` value, it is like the 

6553 value(s) in the dict are equal to the `value` parameter. 

6554 ``s.replace({'a': None})`` is equivalent to 

6555 ``s.replace(to_replace={'a': None}, value=None, method=None)``: 

6556 

6557 >>> s.replace({'a': None}) 

6558 0 10 

6559 1 None 

6560 2 None 

6561 3 b 

6562 4 None 

6563 dtype: object 

6564 

6565 When ``value=None`` and `to_replace` is a scalar, list or 

6566 tuple, `replace` uses the method parameter (default 'pad') to do the 

6567 replacement. So this is why the 'a' values are being replaced by 10 

6568 in rows 1 and 2 and 'b' in row 4 in this case. 

6569 The command ``s.replace('a', None)`` is actually equivalent to 

6570 ``s.replace(to_replace='a', value=None, method='pad')``: 

6571 

6572 >>> s.replace('a', None) 

6573 0 10 

6574 1 10 

6575 2 10 

6576 3 b 

6577 4 b 

6578 dtype: object 

6579 """ 

6580 

6581 @Appender(_shared_docs["replace"] % _shared_doc_kwargs) 

6582 def replace( 

6583 self, 

6584 to_replace=None, 

6585 value=None, 

6586 inplace=False, 

6587 limit=None, 

6588 regex=False, 

6589 method="pad", 

6590 ): 

6591 inplace = validate_bool_kwarg(inplace, "inplace") 

6592 if not is_bool(regex) and to_replace is not None: 

6593 raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") 

6594 

6595 self._consolidate_inplace() 

6596 

6597 if value is None: 

6598 # passing a single value that is scalar like 

6599 # when value is None (GH5319), for compat 

6600 if not is_dict_like(to_replace) and not is_dict_like(regex): 

6601 to_replace = [to_replace] 

6602 

6603 if isinstance(to_replace, (tuple, list)): 

6604 if isinstance(self, ABCDataFrame): 

6605 return self.apply( 

6606 _single_replace, args=(to_replace, method, inplace, limit) 

6607 ) 

6608 return _single_replace(self, to_replace, method, inplace, limit) 

6609 

6610 if not is_dict_like(to_replace): 

6611 if not is_dict_like(regex): 

6612 raise TypeError( 

6613 'If "to_replace" and "value" are both None ' 

6614 'and "to_replace" is not a list, then ' 

6615 "regex must be a mapping" 

6616 ) 

6617 to_replace = regex 

6618 regex = True 

6619 

6620 items = list(to_replace.items()) 

6621 keys, values = zip(*items) if items else ([], []) 

6622 

6623 are_mappings = [is_dict_like(v) for v in values] 

6624 

6625 if any(are_mappings): 

6626 if not all(are_mappings): 

6627 raise TypeError( 

6628 "If a nested mapping is passed, all values " 

6629 "of the top level mapping must be mappings" 

6630 ) 

6631 # passed a nested dict/Series 

6632 to_rep_dict = {} 

6633 value_dict = {} 

6634 

6635 for k, v in items: 

6636 keys, values = list(zip(*v.items())) or ([], []) 

6637 

6638 to_rep_dict[k] = list(keys) 

6639 value_dict[k] = list(values) 

6640 

6641 to_replace, value = to_rep_dict, value_dict 

6642 else: 

6643 to_replace, value = keys, values 

6644 

6645 return self.replace( 

6646 to_replace, value, inplace=inplace, limit=limit, regex=regex 

6647 ) 

6648 else: 

6649 

6650 # need a non-zero len on all axes 

6651 if not self.size: 

6652 return self 

6653 

6654 new_data = self._data 

6655 if is_dict_like(to_replace): 

6656 if is_dict_like(value): # {'A' : NA} -> {'A' : 0} 

6657 res = self if inplace else self.copy() 

6658 for c, src in to_replace.items(): 

6659 if c in value and c in self: 

6660 # object conversion is handled in 

6661 # series.replace which is called recursively 

6662 res[c] = res[c].replace( 

6663 to_replace=src, 

6664 value=value[c], 

6665 inplace=False, 

6666 regex=regex, 

6667 ) 

6668 return None if inplace else res 

6669 

6670 # {'A': NA} -> 0 

6671 elif not is_list_like(value): 

6672 keys = [(k, src) for k, src in to_replace.items() if k in self] 

6673 keys_len = len(keys) - 1 

6674 for i, (k, src) in enumerate(keys): 

6675 convert = i == keys_len 

6676 new_data = new_data.replace( 

6677 to_replace=src, 

6678 value=value, 

6679 filter=[k], 

6680 inplace=inplace, 

6681 regex=regex, 

6682 convert=convert, 

6683 ) 

6684 else: 

6685 raise TypeError("value argument must be scalar, dict, or Series") 

6686 

6687 elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] 

6688 if is_list_like(value): 

6689 if len(to_replace) != len(value): 

6690 raise ValueError( 

6691 f"Replacement lists must match in length. " 

6692 f"Expecting {len(to_replace)} got {len(value)} " 

6693 ) 

6694 

6695 new_data = self._data.replace_list( 

6696 src_list=to_replace, 

6697 dest_list=value, 

6698 inplace=inplace, 

6699 regex=regex, 

6700 ) 

6701 

6702 else: # [NA, ''] -> 0 

6703 new_data = self._data.replace( 

6704 to_replace=to_replace, value=value, inplace=inplace, regex=regex 

6705 ) 

6706 elif to_replace is None: 

6707 if not ( 

6708 is_re_compilable(regex) 

6709 or is_list_like(regex) 

6710 or is_dict_like(regex) 

6711 ): 

6712 raise TypeError( 

6713 f"'regex' must be a string or a compiled regular expression " 

6714 f"or a list or dict of strings or regular expressions, " 

6715 f"you passed a {repr(type(regex).__name__)}" 

6716 ) 

6717 return self.replace( 

6718 regex, value, inplace=inplace, limit=limit, regex=True 

6719 ) 

6720 else: 

6721 

6722 # dest iterable dict-like 

6723 if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} 

6724 new_data = self._data 

6725 

6726 for k, v in value.items(): 

6727 if k in self: 

6728 new_data = new_data.replace( 

6729 to_replace=to_replace, 

6730 value=v, 

6731 filter=[k], 

6732 inplace=inplace, 

6733 regex=regex, 

6734 ) 

6735 

6736 elif not is_list_like(value): # NA -> 0 

6737 new_data = self._data.replace( 

6738 to_replace=to_replace, value=value, inplace=inplace, regex=regex 

6739 ) 

6740 else: 

6741 raise TypeError( 

6742 f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' 

6743 ) 

6744 

6745 if inplace: 

6746 self._update_inplace(new_data) 

6747 else: 

6748 return self._constructor(new_data).__finalize__(self) 

6749 

6750 _shared_docs[ 

6751 "interpolate" 

6752 ] = """ 

6753 Please note that only ``method='linear'`` is supported for 

6754 DataFrame/Series with a MultiIndex. 

6755 

6756 Parameters 

6757 ---------- 

6758 method : str, default 'linear' 

6759 Interpolation technique to use. One of: 

6760 

6761 * 'linear': Ignore the index and treat the values as equally 

6762 spaced. This is the only method supported on MultiIndexes. 

6763 * 'time': Works on daily and higher resolution data to interpolate 

6764 given length of interval. 

6765 * 'index', 'values': use the actual numerical values of the index. 

6766 * 'pad': Fill in NaNs using existing values. 

6767 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', 

6768 'barycentric', 'polynomial': Passed to 

6769 `scipy.interpolate.interp1d`. These methods use the numerical 

6770 values of the index. Both 'polynomial' and 'spline' require that 

6771 you also specify an `order` (int), e.g. 

6772 ``df.interpolate(method='polynomial', order=5)``. 

6773 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': 

6774 Wrappers around the SciPy interpolation methods of similar 

6775 names. See `Notes`. 

6776 * 'from_derivatives': Refers to 

6777 `scipy.interpolate.BPoly.from_derivatives` which 

6778 replaces 'piecewise_polynomial' interpolation method in 

6779 scipy 0.18. 

6780 axis : {0 or 'index', 1 or 'columns', None}, default None 

6781 Axis to interpolate along. 

6782 limit : int, optional 

6783 Maximum number of consecutive NaNs to fill. Must be greater than 

6784 0. 

6785 inplace : bool, default False 

6786 Update the data in place if possible. 

6787 limit_direction : {'forward', 'backward', 'both'}, default 'forward' 

6788 If limit is specified, consecutive NaNs will be filled in this 

6789 direction. 

6790 limit_area : {`None`, 'inside', 'outside'}, default None 

6791 If limit is specified, consecutive NaNs will be filled with this 

6792 restriction. 

6793 

6794 * ``None``: No fill restriction. 

6795 * 'inside': Only fill NaNs surrounded by valid values 

6796 (interpolate). 

6797 * 'outside': Only fill NaNs outside valid values (extrapolate). 

6798 

6799 .. versionadded:: 0.23.0 

6800 

6801 downcast : optional, 'infer' or None, defaults to None 

6802 Downcast dtypes if possible. 

6803 **kwargs 

6804 Keyword arguments to pass on to the interpolating function. 

6805 

6806 Returns 

6807 ------- 

6808 Series or DataFrame 

6809 Returns the same object type as the caller, interpolated at 

6810 some or all ``NaN`` values. 

6811 

6812 See Also 

6813 -------- 

6814 fillna : Fill missing values using different methods. 

6815 scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials 

6816 (Akima interpolator). 

6817 scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the 

6818 Bernstein basis. 

6819 scipy.interpolate.interp1d : Interpolate a 1-D function. 

6820 scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh 

6821 interpolator). 

6822 scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic 

6823 interpolation. 

6824 scipy.interpolate.CubicSpline : Cubic spline data interpolator. 

6825 

6826 Notes 

6827 ----- 

6828 The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' 

6829 methods are wrappers around the respective SciPy implementations of 

6830 similar names. These use the actual numerical values of the index. 

6831 For more information on their behavior, see the 

6832 `SciPy documentation 

6833 <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__ 

6834 and `SciPy tutorial 

6835 <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__. 

6836 

6837 Examples 

6838 -------- 

6839 Filling in ``NaN`` in a :class:`~pandas.Series` via linear 

6840 interpolation. 

6841 

6842 >>> s = pd.Series([0, 1, np.nan, 3]) 

6843 >>> s 

6844 0 0.0 

6845 1 1.0 

6846 2 NaN 

6847 3 3.0 

6848 dtype: float64 

6849 >>> s.interpolate() 

6850 0 0.0 

6851 1 1.0 

6852 2 2.0 

6853 3 3.0 

6854 dtype: float64 

6855 

6856 Filling in ``NaN`` in a Series by padding, but filling at most two 

6857 consecutive ``NaN`` at a time. 

6858 

6859 >>> s = pd.Series([np.nan, "single_one", np.nan, 

6860 ... "fill_two_more", np.nan, np.nan, np.nan, 

6861 ... 4.71, np.nan]) 

6862 >>> s 

6863 0 NaN 

6864 1 single_one 

6865 2 NaN 

6866 3 fill_two_more 

6867 4 NaN 

6868 5 NaN 

6869 6 NaN 

6870 7 4.71 

6871 8 NaN 

6872 dtype: object 

6873 >>> s.interpolate(method='pad', limit=2) 

6874 0 NaN 

6875 1 single_one 

6876 2 single_one 

6877 3 fill_two_more 

6878 4 fill_two_more 

6879 5 fill_two_more 

6880 6 NaN 

6881 7 4.71 

6882 8 4.71 

6883 dtype: object 

6884 

6885 Filling in ``NaN`` in a Series via polynomial interpolation or splines: 

6886 Both 'polynomial' and 'spline' methods require that you also specify 

6887 an ``order`` (int). 

6888 

6889 >>> s = pd.Series([0, 2, np.nan, 8]) 

6890 >>> s.interpolate(method='polynomial', order=2) 

6891 0 0.000000 

6892 1 2.000000 

6893 2 4.666667 

6894 3 8.000000 

6895 dtype: float64 

6896 

6897 Fill the DataFrame forward (that is, going down) along each column 

6898 using linear interpolation. 

6899 

6900 Note how the last entry in column 'a' is interpolated differently, 

6901 because there is no entry after it to use for interpolation. 

6902 Note how the first entry in column 'b' remains ``NaN``, because there 

6903 is no entry before it to use for interpolation. 

6904 

6905 >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), 

6906 ... (np.nan, 2.0, np.nan, np.nan), 

6907 ... (2.0, 3.0, np.nan, 9.0), 

6908 ... (np.nan, 4.0, -4.0, 16.0)], 

6909 ... columns=list('abcd')) 

6910 >>> df 

6911 a b c d 

6912 0 0.0 NaN -1.0 1.0 

6913 1 NaN 2.0 NaN NaN 

6914 2 2.0 3.0 NaN 9.0 

6915 3 NaN 4.0 -4.0 16.0 

6916 >>> df.interpolate(method='linear', limit_direction='forward', axis=0) 

6917 a b c d 

6918 0 0.0 NaN -1.0 1.0 

6919 1 1.0 2.0 -2.0 5.0 

6920 2 2.0 3.0 -3.0 9.0 

6921 3 2.0 4.0 -4.0 16.0 

6922 

6923 Using polynomial interpolation. 

6924 

6925 >>> df['d'].interpolate(method='polynomial', order=2) 

6926 0 1.0 

6927 1 4.0 

6928 2 9.0 

6929 3 16.0 

6930 Name: d, dtype: float64 

6931 """ 

6932 

6933 @Appender(_shared_docs["interpolate"] % _shared_doc_kwargs) 

6934 def interpolate( 

6935 self, 

6936 method="linear", 

6937 axis=0, 

6938 limit=None, 

6939 inplace=False, 

6940 limit_direction="forward", 

6941 limit_area=None, 

6942 downcast=None, 

6943 **kwargs, 

6944 ): 

6945 """ 

6946 Interpolate values according to different methods. 

6947 """ 

6948 inplace = validate_bool_kwarg(inplace, "inplace") 

6949 

6950 axis = self._get_axis_number(axis) 

6951 

6952 if axis == 0: 

6953 ax = self._info_axis_name 

6954 _maybe_transposed_self = self 

6955 elif axis == 1: 

6956 _maybe_transposed_self = self.T 

6957 ax = 1 

6958 

6959 ax = _maybe_transposed_self._get_axis_number(ax) 

6960 

6961 if _maybe_transposed_self.ndim == 2: 

6962 alt_ax = 1 - ax 

6963 else: 

6964 alt_ax = ax 

6965 

6966 if isinstance(_maybe_transposed_self.index, MultiIndex) and method != "linear": 

6967 raise ValueError( 

6968 "Only `method=linear` interpolation is supported on MultiIndexes." 

6969 ) 

6970 

6971 if _maybe_transposed_self._data.get_dtype_counts().get("object") == len( 

6972 _maybe_transposed_self.T 

6973 ): 

6974 raise TypeError( 

6975 "Cannot interpolate with all object-dtype columns " 

6976 "in the DataFrame. Try setting at least one " 

6977 "column to a numeric dtype." 

6978 ) 

6979 

6980 # create/use the index 

6981 if method == "linear": 

6982 # prior default 

6983 index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) 

6984 else: 

6985 index = _maybe_transposed_self._get_axis(alt_ax) 

6986 methods = {"index", "values", "nearest", "time"} 

6987 is_numeric_or_datetime = ( 

6988 is_numeric_dtype(index) 

6989 or is_datetime64_any_dtype(index) 

6990 or is_timedelta64_dtype(index) 

6991 ) 

6992 if method not in methods and not is_numeric_or_datetime: 

6993 raise ValueError( 

6994 "Index column must be numeric or datetime type when " 

6995 f"using {method} method other than linear. " 

6996 "Try setting a numeric or datetime index column before " 

6997 "interpolating." 

6998 ) 

6999 

7000 if isna(index).any(): 

7001 raise NotImplementedError( 

7002 "Interpolation with NaNs in the index " 

7003 "has not been implemented. Try filling " 

7004 "those NaNs before interpolating." 

7005 ) 

7006 data = _maybe_transposed_self._data 

7007 new_data = data.interpolate( 

7008 method=method, 

7009 axis=ax, 

7010 index=index, 

7011 values=_maybe_transposed_self, 

7012 limit=limit, 

7013 limit_direction=limit_direction, 

7014 limit_area=limit_area, 

7015 inplace=inplace, 

7016 downcast=downcast, 

7017 **kwargs, 

7018 ) 

7019 

7020 if inplace: 

7021 if axis == 1: 

7022 new_data = self._constructor(new_data).T._data 

7023 self._update_inplace(new_data) 

7024 else: 

7025 res = self._constructor(new_data).__finalize__(self) 

7026 if axis == 1: 

7027 res = res.T 

7028 return res 

7029 

7030 # ---------------------------------------------------------------------- 

7031 # Timeseries methods Methods 

7032 

7033 def asof(self, where, subset=None): 

7034 """ 

7035 Return the last row(s) without any NaNs before `where`. 

7036 

7037 The last row (for each element in `where`, if list) without any 

7038 NaN is taken. 

7039 In case of a :class:`~pandas.DataFrame`, the last row without NaN 

7040 considering only the subset of columns (if not `None`) 

7041 

7042 If there is no good value, NaN is returned for a Series or 

7043 a Series of NaN values for a DataFrame 

7044 

7045 Parameters 

7046 ---------- 

7047 where : date or array-like of dates 

7048 Date(s) before which the last row(s) are returned. 

7049 subset : str or array-like of str, default `None` 

7050 For DataFrame, if not `None`, only use these columns to 

7051 check for NaNs. 

7052 

7053 Returns 

7054 ------- 

7055 scalar, Series, or DataFrame 

7056 

7057 The return can be: 

7058 

7059 * scalar : when `self` is a Series and `where` is a scalar 

7060 * Series: when `self` is a Series and `where` is an array-like, 

7061 or when `self` is a DataFrame and `where` is a scalar 

7062 * DataFrame : when `self` is a DataFrame and `where` is an 

7063 array-like 

7064 

7065 Return scalar, Series, or DataFrame. 

7066 

7067 See Also 

7068 -------- 

7069 merge_asof : Perform an asof merge. Similar to left join. 

7070 

7071 Notes 

7072 ----- 

7073 Dates are assumed to be sorted. Raises if this is not the case. 

7074 

7075 Examples 

7076 -------- 

7077 A Series and a scalar `where`. 

7078 

7079 >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) 

7080 >>> s 

7081 10 1.0 

7082 20 2.0 

7083 30 NaN 

7084 40 4.0 

7085 dtype: float64 

7086 

7087 >>> s.asof(20) 

7088 2.0 

7089 

7090 For a sequence `where`, a Series is returned. The first value is 

7091 NaN, because the first element of `where` is before the first 

7092 index value. 

7093 

7094 >>> s.asof([5, 20]) 

7095 5 NaN 

7096 20 2.0 

7097 dtype: float64 

7098 

7099 Missing values are not considered. The following is ``2.0``, not 

7100 NaN, even though NaN is at the index location for ``30``. 

7101 

7102 >>> s.asof(30) 

7103 2.0 

7104 

7105 Take all columns into consideration 

7106 

7107 >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], 

7108 ... 'b': [None, None, None, None, 500]}, 

7109 ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', 

7110 ... '2018-02-27 09:02:00', 

7111 ... '2018-02-27 09:03:00', 

7112 ... '2018-02-27 09:04:00', 

7113 ... '2018-02-27 09:05:00'])) 

7114 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7115 ... '2018-02-27 09:04:30'])) 

7116 a b 

7117 2018-02-27 09:03:30 NaN NaN 

7118 2018-02-27 09:04:30 NaN NaN 

7119 

7120 Take a single column into consideration 

7121 

7122 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7123 ... '2018-02-27 09:04:30']), 

7124 ... subset=['a']) 

7125 a b 

7126 2018-02-27 09:03:30 30.0 NaN 

7127 2018-02-27 09:04:30 40.0 NaN 

7128 """ 

7129 if isinstance(where, str): 

7130 where = Timestamp(where) 

7131 

7132 if not self.index.is_monotonic: 

7133 raise ValueError("asof requires a sorted index") 

7134 

7135 is_series = isinstance(self, ABCSeries) 

7136 if is_series: 

7137 if subset is not None: 

7138 raise ValueError("subset is not valid for Series") 

7139 else: 

7140 if subset is None: 

7141 subset = self.columns 

7142 if not is_list_like(subset): 

7143 subset = [subset] 

7144 

7145 is_list = is_list_like(where) 

7146 if not is_list: 

7147 start = self.index[0] 

7148 if isinstance(self.index, PeriodIndex): 

7149 where = Period(where, freq=self.index.freq) 

7150 

7151 if where < start: 

7152 if not is_series: 

7153 from pandas import Series 

7154 

7155 return Series(index=self.columns, name=where, dtype=np.float64) 

7156 return np.nan 

7157 

7158 # It's always much faster to use a *while* loop here for 

7159 # Series than pre-computing all the NAs. However a 

7160 # *while* loop is extremely expensive for DataFrame 

7161 # so we later pre-compute all the NAs and use the same 

7162 # code path whether *where* is a scalar or list. 

7163 # See PR: https://github.com/pandas-dev/pandas/pull/14476 

7164 if is_series: 

7165 loc = self.index.searchsorted(where, side="right") 

7166 if loc > 0: 

7167 loc -= 1 

7168 

7169 values = self._values 

7170 while loc > 0 and isna(values[loc]): 

7171 loc -= 1 

7172 return values[loc] 

7173 

7174 if not isinstance(where, Index): 

7175 where = Index(where) if is_list else Index([where]) 

7176 

7177 nulls = self.isna() if is_series else self[subset].isna().any(1) 

7178 if nulls.all(): 

7179 if is_series: 

7180 return self._constructor(np.nan, index=where, name=self.name) 

7181 elif is_list: 

7182 from pandas import DataFrame 

7183 

7184 return DataFrame(np.nan, index=where, columns=self.columns) 

7185 else: 

7186 from pandas import Series 

7187 

7188 return Series(np.nan, index=self.columns, name=where[0]) 

7189 

7190 locs = self.index.asof_locs(where, ~(nulls.values)) 

7191 

7192 # mask the missing 

7193 missing = locs == -1 

7194 data = self.take(locs) 

7195 data.index = where 

7196 data.loc[missing] = np.nan 

7197 return data if is_list else data.iloc[-1] 

7198 

7199 # ---------------------------------------------------------------------- 

7200 # Action Methods 

7201 

7202 _shared_docs[ 

7203 "isna" 

7204 ] = """ 

7205 Detect missing values. 

7206 

7207 Return a boolean same-sized object indicating if the values are NA. 

7208 NA values, such as None or :attr:`numpy.NaN`, gets mapped to True 

7209 values. 

7210 Everything else gets mapped to False values. Characters such as empty 

7211 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7212 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7213 

7214 Returns 

7215 ------- 

7216 %(klass)s 

7217 Mask of bool values for each element in %(klass)s that 

7218 indicates whether an element is not an NA value. 

7219 

7220 See Also 

7221 -------- 

7222 %(klass)s.isnull : Alias of isna. 

7223 %(klass)s.notna : Boolean inverse of isna. 

7224 %(klass)s.dropna : Omit axes labels with missing values. 

7225 isna : Top-level isna. 

7226 

7227 Examples 

7228 -------- 

7229 Show which entries in a DataFrame are NA. 

7230 

7231 >>> df = pd.DataFrame({'age': [5, 6, np.NaN], 

7232 ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), 

7233 ... pd.Timestamp('1940-04-25')], 

7234 ... 'name': ['Alfred', 'Batman', ''], 

7235 ... 'toy': [None, 'Batmobile', 'Joker']}) 

7236 >>> df 

7237 age born name toy 

7238 0 5.0 NaT Alfred None 

7239 1 6.0 1939-05-27 Batman Batmobile 

7240 2 NaN 1940-04-25 Joker 

7241 

7242 >>> df.isna() 

7243 age born name toy 

7244 0 False True False True 

7245 1 False False False False 

7246 2 True False False False 

7247 

7248 Show which entries in a Series are NA. 

7249 

7250 >>> ser = pd.Series([5, 6, np.NaN]) 

7251 >>> ser 

7252 0 5.0 

7253 1 6.0 

7254 2 NaN 

7255 dtype: float64 

7256 

7257 >>> ser.isna() 

7258 0 False 

7259 1 False 

7260 2 True 

7261 dtype: bool 

7262 """ 

7263 

7264 @Appender(_shared_docs["isna"] % _shared_doc_kwargs) 

7265 def isna(self: FrameOrSeries) -> FrameOrSeries: 

7266 return isna(self).__finalize__(self) 

7267 

7268 @Appender(_shared_docs["isna"] % _shared_doc_kwargs) 

7269 def isnull(self: FrameOrSeries) -> FrameOrSeries: 

7270 return isna(self).__finalize__(self) 

7271 

7272 _shared_docs[ 

7273 "notna" 

7274 ] = """ 

7275 Detect existing (non-missing) values. 

7276 

7277 Return a boolean same-sized object indicating if the values are not NA. 

7278 Non-missing values get mapped to True. Characters such as empty 

7279 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7280 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7281 NA values, such as None or :attr:`numpy.NaN`, get mapped to False 

7282 values. 

7283 

7284 Returns 

7285 ------- 

7286 %(klass)s 

7287 Mask of bool values for each element in %(klass)s that 

7288 indicates whether an element is not an NA value. 

7289 

7290 See Also 

7291 -------- 

7292 %(klass)s.notnull : Alias of notna. 

7293 %(klass)s.isna : Boolean inverse of notna. 

7294 %(klass)s.dropna : Omit axes labels with missing values. 

7295 notna : Top-level notna. 

7296 

7297 Examples 

7298 -------- 

7299 Show which entries in a DataFrame are not NA. 

7300 

7301 >>> df = pd.DataFrame({'age': [5, 6, np.NaN], 

7302 ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), 

7303 ... pd.Timestamp('1940-04-25')], 

7304 ... 'name': ['Alfred', 'Batman', ''], 

7305 ... 'toy': [None, 'Batmobile', 'Joker']}) 

7306 >>> df 

7307 age born name toy 

7308 0 5.0 NaT Alfred None 

7309 1 6.0 1939-05-27 Batman Batmobile 

7310 2 NaN 1940-04-25 Joker 

7311 

7312 >>> df.notna() 

7313 age born name toy 

7314 0 True False True False 

7315 1 True True True True 

7316 2 False True True True 

7317 

7318 Show which entries in a Series are not NA. 

7319 

7320 >>> ser = pd.Series([5, 6, np.NaN]) 

7321 >>> ser 

7322 0 5.0 

7323 1 6.0 

7324 2 NaN 

7325 dtype: float64 

7326 

7327 >>> ser.notna() 

7328 0 True 

7329 1 True 

7330 2 False 

7331 dtype: bool 

7332 """ 

7333 

7334 @Appender(_shared_docs["notna"] % _shared_doc_kwargs) 

7335 def notna(self: FrameOrSeries) -> FrameOrSeries: 

7336 return notna(self).__finalize__(self) 

7337 

7338 @Appender(_shared_docs["notna"] % _shared_doc_kwargs) 

7339 def notnull(self: FrameOrSeries) -> FrameOrSeries: 

7340 return notna(self).__finalize__(self) 

7341 

7342 def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): 

7343 if (lower is not None and np.any(isna(lower))) or ( 

7344 upper is not None and np.any(isna(upper)) 

7345 ): 

7346 raise ValueError("Cannot use an NA value as a clip threshold") 

7347 

7348 result = self 

7349 mask = isna(self.values) 

7350 

7351 with np.errstate(all="ignore"): 

7352 if upper is not None: 

7353 subset = self.to_numpy() <= upper 

7354 result = result.where(subset, upper, axis=None, inplace=False) 

7355 if lower is not None: 

7356 subset = self.to_numpy() >= lower 

7357 result = result.where(subset, lower, axis=None, inplace=False) 

7358 

7359 if np.any(mask): 

7360 result[mask] = np.nan 

7361 

7362 if inplace: 

7363 self._update_inplace(result) 

7364 else: 

7365 return result 

7366 

7367 def _clip_with_one_bound(self, threshold, method, axis, inplace): 

7368 

7369 if axis is not None: 

7370 axis = self._get_axis_number(axis) 

7371 

7372 # method is self.le for upper bound and self.ge for lower bound 

7373 if is_scalar(threshold) and is_number(threshold): 

7374 if method.__name__ == "le": 

7375 return self._clip_with_scalar(None, threshold, inplace=inplace) 

7376 return self._clip_with_scalar(threshold, None, inplace=inplace) 

7377 

7378 subset = method(threshold, axis=axis) | isna(self) 

7379 

7380 # GH #15390 

7381 # In order for where method to work, the threshold must 

7382 # be transformed to NDFrame from other array like structure. 

7383 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): 

7384 if isinstance(self, ABCSeries): 

7385 threshold = self._constructor(threshold, index=self.index) 

7386 else: 

7387 threshold = _align_method_FRAME(self, threshold, axis) 

7388 return self.where(subset, threshold, axis=axis, inplace=inplace) 

7389 

7390 def clip( 

7391 self: FrameOrSeries, 

7392 lower=None, 

7393 upper=None, 

7394 axis=None, 

7395 inplace: bool_t = False, 

7396 *args, 

7397 **kwargs, 

7398 ) -> FrameOrSeries: 

7399 """ 

7400 Trim values at input threshold(s). 

7401 

7402 Assigns values outside boundary to boundary values. Thresholds 

7403 can be singular values or array like, and in the latter case 

7404 the clipping is performed element-wise in the specified axis. 

7405 

7406 Parameters 

7407 ---------- 

7408 lower : float or array_like, default None 

7409 Minimum threshold value. All values below this 

7410 threshold will be set to it. 

7411 upper : float or array_like, default None 

7412 Maximum threshold value. All values above this 

7413 threshold will be set to it. 

7414 axis : int or str axis name, optional 

7415 Align object with lower and upper along the given axis. 

7416 inplace : bool, default False 

7417 Whether to perform the operation in place on the data. 

7418 

7419 .. versionadded:: 0.21.0 

7420 *args, **kwargs 

7421 Additional keywords have no effect but might be accepted 

7422 for compatibility with numpy. 

7423 

7424 Returns 

7425 ------- 

7426 Series or DataFrame 

7427 Same type as calling object with the values outside the 

7428 clip boundaries replaced. 

7429 

7430 Examples 

7431 -------- 

7432 >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} 

7433 >>> df = pd.DataFrame(data) 

7434 >>> df 

7435 col_0 col_1 

7436 0 9 -2 

7437 1 -3 -7 

7438 2 0 6 

7439 3 -1 8 

7440 4 5 -5 

7441 

7442 Clips per column using lower and upper thresholds: 

7443 

7444 >>> df.clip(-4, 6) 

7445 col_0 col_1 

7446 0 6 -2 

7447 1 -3 -4 

7448 2 0 6 

7449 3 -1 6 

7450 4 5 -4 

7451 

7452 Clips using specific lower and upper thresholds per column element: 

7453 

7454 >>> t = pd.Series([2, -4, -1, 6, 3]) 

7455 >>> t 

7456 0 2 

7457 1 -4 

7458 2 -1 

7459 3 6 

7460 4 3 

7461 dtype: int64 

7462 

7463 >>> df.clip(t, t + 4, axis=0) 

7464 col_0 col_1 

7465 0 6 2 

7466 1 -3 -4 

7467 2 0 3 

7468 3 6 8 

7469 4 5 3 

7470 """ 

7471 inplace = validate_bool_kwarg(inplace, "inplace") 

7472 

7473 axis = nv.validate_clip_with_axis(axis, args, kwargs) 

7474 if axis is not None: 

7475 axis = self._get_axis_number(axis) 

7476 

7477 # GH 17276 

7478 # numpy doesn't like NaN as a clip value 

7479 # so ignore 

7480 # GH 19992 

7481 # numpy doesn't drop a list-like bound containing NaN 

7482 if not is_list_like(lower) and np.any(isna(lower)): 

7483 lower = None 

7484 if not is_list_like(upper) and np.any(isna(upper)): 

7485 upper = None 

7486 

7487 # GH 2747 (arguments were reversed) 

7488 if lower is not None and upper is not None: 

7489 if is_scalar(lower) and is_scalar(upper): 

7490 lower, upper = min(lower, upper), max(lower, upper) 

7491 

7492 # fast-path for scalars 

7493 if (lower is None or (is_scalar(lower) and is_number(lower))) and ( 

7494 upper is None or (is_scalar(upper) and is_number(upper)) 

7495 ): 

7496 return self._clip_with_scalar(lower, upper, inplace=inplace) 

7497 

7498 result = self 

7499 if lower is not None: 

7500 result = result._clip_with_one_bound( 

7501 lower, method=self.ge, axis=axis, inplace=inplace 

7502 ) 

7503 if upper is not None: 

7504 if inplace: 

7505 result = self 

7506 result = result._clip_with_one_bound( 

7507 upper, method=self.le, axis=axis, inplace=inplace 

7508 ) 

7509 

7510 return result 

7511 

7512 _shared_docs[ 

7513 "groupby" 

7514 ] = """ 

7515 Group %(klass)s using a mapper or by a Series of columns. 

7516 

7517 A groupby operation involves some combination of splitting the 

7518 object, applying a function, and combining the results. This can be 

7519 used to group large amounts of data and compute operations on these 

7520 groups. 

7521 

7522 Parameters 

7523 ---------- 

7524 by : mapping, function, label, or list of labels 

7525 Used to determine the groups for the groupby. 

7526 If ``by`` is a function, it's called on each value of the object's 

7527 index. If a dict or Series is passed, the Series or dict VALUES 

7528 will be used to determine the groups (the Series' values are first 

7529 aligned; see ``.align()`` method). If an ndarray is passed, the 

7530 values are used as-is determine the groups. A label or list of 

7531 labels may be passed to group by the columns in ``self``. Notice 

7532 that a tuple is interpreted as a (single) key. 

7533 axis : {0 or 'index', 1 or 'columns'}, default 0 

7534 Split along rows (0) or columns (1). 

7535 level : int, level name, or sequence of such, default None 

7536 If the axis is a MultiIndex (hierarchical), group by a particular 

7537 level or levels. 

7538 as_index : bool, default True 

7539 For aggregated output, return object with group labels as the 

7540 index. Only relevant for DataFrame input. as_index=False is 

7541 effectively "SQL-style" grouped output. 

7542 sort : bool, default True 

7543 Sort group keys. Get better performance by turning this off. 

7544 Note this does not influence the order of observations within each 

7545 group. Groupby preserves the order of rows within each group. 

7546 group_keys : bool, default True 

7547 When calling apply, add group keys to index to identify pieces. 

7548 squeeze : bool, default False 

7549 Reduce the dimensionality of the return type if possible, 

7550 otherwise return a consistent type. 

7551 observed : bool, default False 

7552 This only applies if any of the groupers are Categoricals. 

7553 If True: only show observed values for categorical groupers. 

7554 If False: show all values for categorical groupers. 

7555 

7556 .. versionadded:: 0.23.0 

7557 

7558 Returns 

7559 ------- 

7560 %(klass)sGroupBy 

7561 Returns a groupby object that contains information about the groups. 

7562 

7563 See Also 

7564 -------- 

7565 resample : Convenience method for frequency conversion and resampling 

7566 of time series. 

7567 

7568 Notes 

7569 ----- 

7570 See the `user guide 

7571 <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more. 

7572 """ 

7573 

7574 def asfreq( 

7575 self: FrameOrSeries, 

7576 freq, 

7577 method=None, 

7578 how: Optional[str] = None, 

7579 normalize: bool_t = False, 

7580 fill_value=None, 

7581 ) -> FrameOrSeries: 

7582 """ 

7583 Convert TimeSeries to specified frequency. 

7584 

7585 Optionally provide filling method to pad/backfill missing values. 

7586 

7587 Returns the original data conformed to a new index with the specified 

7588 frequency. ``resample`` is more appropriate if an operation, such as 

7589 summarization, is necessary to represent the data at the new frequency. 

7590 

7591 Parameters 

7592 ---------- 

7593 freq : DateOffset or str 

7594 method : {'backfill'/'bfill', 'pad'/'ffill'}, default None 

7595 Method to use for filling holes in reindexed Series (note this 

7596 does not fill NaNs that already were present): 

7597 

7598 * 'pad' / 'ffill': propagate last valid observation forward to next 

7599 valid 

7600 * 'backfill' / 'bfill': use NEXT valid observation to fill. 

7601 how : {'start', 'end'}, default end 

7602 For PeriodIndex only (see PeriodIndex.asfreq). 

7603 normalize : bool, default False 

7604 Whether to reset output index to midnight. 

7605 fill_value : scalar, optional 

7606 Value to use for missing values, applied during upsampling (note 

7607 this does not fill NaNs that already were present). 

7608 

7609 Returns 

7610 ------- 

7611 converted : same type as caller 

7612 

7613 See Also 

7614 -------- 

7615 reindex 

7616 

7617 Notes 

7618 ----- 

7619 To learn more about the frequency strings, please see `this link 

7620 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

7621 

7622 Examples 

7623 -------- 

7624 

7625 Start by creating a series with 4 one minute timestamps. 

7626 

7627 >>> index = pd.date_range('1/1/2000', periods=4, freq='T') 

7628 >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) 

7629 >>> df = pd.DataFrame({'s':series}) 

7630 >>> df 

7631 s 

7632 2000-01-01 00:00:00 0.0 

7633 2000-01-01 00:01:00 NaN 

7634 2000-01-01 00:02:00 2.0 

7635 2000-01-01 00:03:00 3.0 

7636 

7637 Upsample the series into 30 second bins. 

7638 

7639 >>> df.asfreq(freq='30S') 

7640 s 

7641 2000-01-01 00:00:00 0.0 

7642 2000-01-01 00:00:30 NaN 

7643 2000-01-01 00:01:00 NaN 

7644 2000-01-01 00:01:30 NaN 

7645 2000-01-01 00:02:00 2.0 

7646 2000-01-01 00:02:30 NaN 

7647 2000-01-01 00:03:00 3.0 

7648 

7649 Upsample again, providing a ``fill value``. 

7650 

7651 >>> df.asfreq(freq='30S', fill_value=9.0) 

7652 s 

7653 2000-01-01 00:00:00 0.0 

7654 2000-01-01 00:00:30 9.0 

7655 2000-01-01 00:01:00 NaN 

7656 2000-01-01 00:01:30 9.0 

7657 2000-01-01 00:02:00 2.0 

7658 2000-01-01 00:02:30 9.0 

7659 2000-01-01 00:03:00 3.0 

7660 

7661 Upsample again, providing a ``method``. 

7662 

7663 >>> df.asfreq(freq='30S', method='bfill') 

7664 s 

7665 2000-01-01 00:00:00 0.0 

7666 2000-01-01 00:00:30 NaN 

7667 2000-01-01 00:01:00 NaN 

7668 2000-01-01 00:01:30 2.0 

7669 2000-01-01 00:02:00 2.0 

7670 2000-01-01 00:02:30 3.0 

7671 2000-01-01 00:03:00 3.0 

7672 """ 

7673 from pandas.core.resample import asfreq 

7674 

7675 return asfreq( 

7676 self, 

7677 freq, 

7678 method=method, 

7679 how=how, 

7680 normalize=normalize, 

7681 fill_value=fill_value, 

7682 ) 

7683 

7684 def at_time( 

7685 self: FrameOrSeries, time, asof: bool_t = False, axis=None 

7686 ) -> FrameOrSeries: 

7687 """ 

7688 Select values at particular time of day (e.g. 9:30AM). 

7689 

7690 Parameters 

7691 ---------- 

7692 time : datetime.time or str 

7693 axis : {0 or 'index', 1 or 'columns'}, default 0 

7694 

7695 .. versionadded:: 0.24.0 

7696 

7697 Returns 

7698 ------- 

7699 Series or DataFrame 

7700 

7701 Raises 

7702 ------ 

7703 TypeError 

7704 If the index is not a :class:`DatetimeIndex` 

7705 

7706 See Also 

7707 -------- 

7708 between_time : Select values between particular times of the day. 

7709 first : Select initial periods of time series based on a date offset. 

7710 last : Select final periods of time series based on a date offset. 

7711 DatetimeIndex.indexer_at_time : Get just the index locations for 

7712 values at particular time of the day. 

7713 

7714 Examples 

7715 -------- 

7716 >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') 

7717 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

7718 >>> ts 

7719 A 

7720 2018-04-09 00:00:00 1 

7721 2018-04-09 12:00:00 2 

7722 2018-04-10 00:00:00 3 

7723 2018-04-10 12:00:00 4 

7724 

7725 >>> ts.at_time('12:00') 

7726 A 

7727 2018-04-09 12:00:00 2 

7728 2018-04-10 12:00:00 4 

7729 """ 

7730 if axis is None: 

7731 axis = self._stat_axis_number 

7732 axis = self._get_axis_number(axis) 

7733 

7734 index = self._get_axis(axis) 

7735 try: 

7736 indexer = index.indexer_at_time(time, asof=asof) 

7737 except AttributeError: 

7738 raise TypeError("Index must be DatetimeIndex") 

7739 

7740 return self._take_with_is_copy(indexer, axis=axis) 

7741 

7742 def between_time( 

7743 self: FrameOrSeries, 

7744 start_time, 

7745 end_time, 

7746 include_start: bool_t = True, 

7747 include_end: bool_t = True, 

7748 axis=None, 

7749 ) -> FrameOrSeries: 

7750 """ 

7751 Select values between particular times of the day (e.g., 9:00-9:30 AM). 

7752 

7753 By setting ``start_time`` to be later than ``end_time``, 

7754 you can get the times that are *not* between the two times. 

7755 

7756 Parameters 

7757 ---------- 

7758 start_time : datetime.time or str 

7759 end_time : datetime.time or str 

7760 include_start : bool, default True 

7761 include_end : bool, default True 

7762 axis : {0 or 'index', 1 or 'columns'}, default 0 

7763 

7764 .. versionadded:: 0.24.0 

7765 

7766 Returns 

7767 ------- 

7768 Series or DataFrame 

7769 

7770 Raises 

7771 ------ 

7772 TypeError 

7773 If the index is not a :class:`DatetimeIndex` 

7774 

7775 See Also 

7776 -------- 

7777 at_time : Select values at a particular time of the day. 

7778 first : Select initial periods of time series based on a date offset. 

7779 last : Select final periods of time series based on a date offset. 

7780 DatetimeIndex.indexer_between_time : Get just the index locations for 

7781 values between particular times of the day. 

7782 

7783 Examples 

7784 -------- 

7785 >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') 

7786 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

7787 >>> ts 

7788 A 

7789 2018-04-09 00:00:00 1 

7790 2018-04-10 00:20:00 2 

7791 2018-04-11 00:40:00 3 

7792 2018-04-12 01:00:00 4 

7793 

7794 >>> ts.between_time('0:15', '0:45') 

7795 A 

7796 2018-04-10 00:20:00 2 

7797 2018-04-11 00:40:00 3 

7798 

7799 You get the times that are *not* between two times by setting 

7800 ``start_time`` later than ``end_time``: 

7801 

7802 >>> ts.between_time('0:45', '0:15') 

7803 A 

7804 2018-04-09 00:00:00 1 

7805 2018-04-12 01:00:00 4 

7806 """ 

7807 if axis is None: 

7808 axis = self._stat_axis_number 

7809 axis = self._get_axis_number(axis) 

7810 

7811 index = self._get_axis(axis) 

7812 try: 

7813 indexer = index.indexer_between_time( 

7814 start_time, 

7815 end_time, 

7816 include_start=include_start, 

7817 include_end=include_end, 

7818 ) 

7819 except AttributeError: 

7820 raise TypeError("Index must be DatetimeIndex") 

7821 

7822 return self._take_with_is_copy(indexer, axis=axis) 

7823 

7824 def resample( 

7825 self, 

7826 rule, 

7827 axis=0, 

7828 closed: Optional[str] = None, 

7829 label: Optional[str] = None, 

7830 convention: str = "start", 

7831 kind: Optional[str] = None, 

7832 loffset=None, 

7833 base: int = 0, 

7834 on=None, 

7835 level=None, 

7836 ): 

7837 """ 

7838 Resample time-series data. 

7839 

7840 Convenience method for frequency conversion and resampling of time 

7841 series. Object must have a datetime-like index (`DatetimeIndex`, 

7842 `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values 

7843 to the `on` or `level` keyword. 

7844 

7845 Parameters 

7846 ---------- 

7847 rule : DateOffset, Timedelta or str 

7848 The offset string or object representing target conversion. 

7849 axis : {0 or 'index', 1 or 'columns'}, default 0 

7850 Which axis to use for up- or down-sampling. For `Series` this 

7851 will default to 0, i.e. along the rows. Must be 

7852 `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. 

7853 closed : {'right', 'left'}, default None 

7854 Which side of bin interval is closed. The default is 'left' 

7855 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

7856 'BA', 'BQ', and 'W' which all have a default of 'right'. 

7857 label : {'right', 'left'}, default None 

7858 Which bin edge label to label bucket with. The default is 'left' 

7859 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

7860 'BA', 'BQ', and 'W' which all have a default of 'right'. 

7861 convention : {'start', 'end', 's', 'e'}, default 'start' 

7862 For `PeriodIndex` only, controls whether to use the start or 

7863 end of `rule`. 

7864 kind : {'timestamp', 'period'}, optional, default None 

7865 Pass 'timestamp' to convert the resulting index to a 

7866 `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. 

7867 By default the input representation is retained. 

7868 loffset : timedelta, default None 

7869 Adjust the resampled time labels. 

7870 base : int, default 0 

7871 For frequencies that evenly subdivide 1 day, the "origin" of the 

7872 aggregated intervals. For example, for '5min' frequency, base could 

7873 range from 0 through 4. Defaults to 0. 

7874 on : str, optional 

7875 For a DataFrame, column to use instead of index for resampling. 

7876 Column must be datetime-like. 

7877 

7878 level : str or int, optional 

7879 For a MultiIndex, level (name or number) to use for 

7880 resampling. `level` must be datetime-like. 

7881 

7882 Returns 

7883 ------- 

7884 Resampler object 

7885 

7886 See Also 

7887 -------- 

7888 groupby : Group by mapping, function, label, or list of labels. 

7889 Series.resample : Resample a Series. 

7890 DataFrame.resample: Resample a DataFrame. 

7891 

7892 Notes 

7893 ----- 

7894 See the `user guide 

7895 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`_ 

7896 for more. 

7897 

7898 To learn more about the offset strings, please see `this link 

7899 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__. 

7900 

7901 Examples 

7902 -------- 

7903 

7904 Start by creating a series with 9 one minute timestamps. 

7905 

7906 >>> index = pd.date_range('1/1/2000', periods=9, freq='T') 

7907 >>> series = pd.Series(range(9), index=index) 

7908 >>> series 

7909 2000-01-01 00:00:00 0 

7910 2000-01-01 00:01:00 1 

7911 2000-01-01 00:02:00 2 

7912 2000-01-01 00:03:00 3 

7913 2000-01-01 00:04:00 4 

7914 2000-01-01 00:05:00 5 

7915 2000-01-01 00:06:00 6 

7916 2000-01-01 00:07:00 7 

7917 2000-01-01 00:08:00 8 

7918 Freq: T, dtype: int64 

7919 

7920 Downsample the series into 3 minute bins and sum the values 

7921 of the timestamps falling into a bin. 

7922 

7923 >>> series.resample('3T').sum() 

7924 2000-01-01 00:00:00 3 

7925 2000-01-01 00:03:00 12 

7926 2000-01-01 00:06:00 21 

7927 Freq: 3T, dtype: int64 

7928 

7929 Downsample the series into 3 minute bins as above, but label each 

7930 bin using the right edge instead of the left. Please note that the 

7931 value in the bucket used as the label is not included in the bucket, 

7932 which it labels. For example, in the original series the 

7933 bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed 

7934 value in the resampled bucket with the label ``2000-01-01 00:03:00`` 

7935 does not include 3 (if it did, the summed value would be 6, not 3). 

7936 To include this value close the right side of the bin interval as 

7937 illustrated in the example below this one. 

7938 

7939 >>> series.resample('3T', label='right').sum() 

7940 2000-01-01 00:03:00 3 

7941 2000-01-01 00:06:00 12 

7942 2000-01-01 00:09:00 21 

7943 Freq: 3T, dtype: int64 

7944 

7945 Downsample the series into 3 minute bins as above, but close the right 

7946 side of the bin interval. 

7947 

7948 >>> series.resample('3T', label='right', closed='right').sum() 

7949 2000-01-01 00:00:00 0 

7950 2000-01-01 00:03:00 6 

7951 2000-01-01 00:06:00 15 

7952 2000-01-01 00:09:00 15 

7953 Freq: 3T, dtype: int64 

7954 

7955 Upsample the series into 30 second bins. 

7956 

7957 >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows 

7958 2000-01-01 00:00:00 0.0 

7959 2000-01-01 00:00:30 NaN 

7960 2000-01-01 00:01:00 1.0 

7961 2000-01-01 00:01:30 NaN 

7962 2000-01-01 00:02:00 2.0 

7963 Freq: 30S, dtype: float64 

7964 

7965 Upsample the series into 30 second bins and fill the ``NaN`` 

7966 values using the ``pad`` method. 

7967 

7968 >>> series.resample('30S').pad()[0:5] 

7969 2000-01-01 00:00:00 0 

7970 2000-01-01 00:00:30 0 

7971 2000-01-01 00:01:00 1 

7972 2000-01-01 00:01:30 1 

7973 2000-01-01 00:02:00 2 

7974 Freq: 30S, dtype: int64 

7975 

7976 Upsample the series into 30 second bins and fill the 

7977 ``NaN`` values using the ``bfill`` method. 

7978 

7979 >>> series.resample('30S').bfill()[0:5] 

7980 2000-01-01 00:00:00 0 

7981 2000-01-01 00:00:30 1 

7982 2000-01-01 00:01:00 1 

7983 2000-01-01 00:01:30 2 

7984 2000-01-01 00:02:00 2 

7985 Freq: 30S, dtype: int64 

7986 

7987 Pass a custom function via ``apply`` 

7988 

7989 >>> def custom_resampler(array_like): 

7990 ... return np.sum(array_like) + 5 

7991 ... 

7992 >>> series.resample('3T').apply(custom_resampler) 

7993 2000-01-01 00:00:00 8 

7994 2000-01-01 00:03:00 17 

7995 2000-01-01 00:06:00 26 

7996 Freq: 3T, dtype: int64 

7997 

7998 For a Series with a PeriodIndex, the keyword `convention` can be 

7999 used to control whether to use the start or end of `rule`. 

8000 

8001 Resample a year by quarter using 'start' `convention`. Values are 

8002 assigned to the first quarter of the period. 

8003 

8004 >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', 

8005 ... freq='A', 

8006 ... periods=2)) 

8007 >>> s 

8008 2012 1 

8009 2013 2 

8010 Freq: A-DEC, dtype: int64 

8011 >>> s.resample('Q', convention='start').asfreq() 

8012 2012Q1 1.0 

8013 2012Q2 NaN 

8014 2012Q3 NaN 

8015 2012Q4 NaN 

8016 2013Q1 2.0 

8017 2013Q2 NaN 

8018 2013Q3 NaN 

8019 2013Q4 NaN 

8020 Freq: Q-DEC, dtype: float64 

8021 

8022 Resample quarters by month using 'end' `convention`. Values are 

8023 assigned to the last month of the period. 

8024 

8025 >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', 

8026 ... freq='Q', 

8027 ... periods=4)) 

8028 >>> q 

8029 2018Q1 1 

8030 2018Q2 2 

8031 2018Q3 3 

8032 2018Q4 4 

8033 Freq: Q-DEC, dtype: int64 

8034 >>> q.resample('M', convention='end').asfreq() 

8035 2018-03 1.0 

8036 2018-04 NaN 

8037 2018-05 NaN 

8038 2018-06 2.0 

8039 2018-07 NaN 

8040 2018-08 NaN 

8041 2018-09 3.0 

8042 2018-10 NaN 

8043 2018-11 NaN 

8044 2018-12 4.0 

8045 Freq: M, dtype: float64 

8046 

8047 For DataFrame objects, the keyword `on` can be used to specify the 

8048 column instead of the index for resampling. 

8049 

8050 >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8051 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) 

8052 >>> df = pd.DataFrame(d) 

8053 >>> df['week_starting'] = pd.date_range('01/01/2018', 

8054 ... periods=8, 

8055 ... freq='W') 

8056 >>> df 

8057 price volume week_starting 

8058 0 10 50 2018-01-07 

8059 1 11 60 2018-01-14 

8060 2 9 40 2018-01-21 

8061 3 13 100 2018-01-28 

8062 4 14 50 2018-02-04 

8063 5 18 100 2018-02-11 

8064 6 17 40 2018-02-18 

8065 7 19 50 2018-02-25 

8066 >>> df.resample('M', on='week_starting').mean() 

8067 price volume 

8068 week_starting 

8069 2018-01-31 10.75 62.5 

8070 2018-02-28 17.00 60.0 

8071 

8072 For a DataFrame with MultiIndex, the keyword `level` can be used to 

8073 specify on which level the resampling needs to take place. 

8074 

8075 >>> days = pd.date_range('1/1/2000', periods=4, freq='D') 

8076 >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8077 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) 

8078 >>> df2 = pd.DataFrame(d2, 

8079 ... index=pd.MultiIndex.from_product([days, 

8080 ... ['morning', 

8081 ... 'afternoon']] 

8082 ... )) 

8083 >>> df2 

8084 price volume 

8085 2000-01-01 morning 10 50 

8086 afternoon 11 60 

8087 2000-01-02 morning 9 40 

8088 afternoon 13 100 

8089 2000-01-03 morning 14 50 

8090 afternoon 18 100 

8091 2000-01-04 morning 17 40 

8092 afternoon 19 50 

8093 >>> df2.resample('D', level=0).sum() 

8094 price volume 

8095 2000-01-01 21 110 

8096 2000-01-02 22 140 

8097 2000-01-03 32 150 

8098 2000-01-04 36 90 

8099 """ 

8100 

8101 from pandas.core.resample import resample 

8102 

8103 axis = self._get_axis_number(axis) 

8104 return resample( 

8105 self, 

8106 freq=rule, 

8107 label=label, 

8108 closed=closed, 

8109 axis=axis, 

8110 kind=kind, 

8111 loffset=loffset, 

8112 convention=convention, 

8113 base=base, 

8114 key=on, 

8115 level=level, 

8116 ) 

8117 

8118 def first(self: FrameOrSeries, offset) -> FrameOrSeries: 

8119 """ 

8120 Method to subset initial periods of time series data based on a date offset. 

8121 

8122 Parameters 

8123 ---------- 

8124 offset : str, DateOffset, dateutil.relativedelta 

8125 

8126 Returns 

8127 ------- 

8128 subset : same type as caller 

8129 

8130 Raises 

8131 ------ 

8132 TypeError 

8133 If the index is not a :class:`DatetimeIndex` 

8134 

8135 See Also 

8136 -------- 

8137 last : Select final periods of time series based on a date offset. 

8138 at_time : Select values at a particular time of the day. 

8139 between_time : Select values between particular times of the day. 

8140 

8141 Examples 

8142 -------- 

8143 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

8144 >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) 

8145 >>> ts 

8146 A 

8147 2018-04-09 1 

8148 2018-04-11 2 

8149 2018-04-13 3 

8150 2018-04-15 4 

8151 

8152 Get the rows for the first 3 days: 

8153 

8154 >>> ts.first('3D') 

8155 A 

8156 2018-04-09 1 

8157 2018-04-11 2 

8158 

8159 Notice the data for 3 first calender days were returned, not the first 

8160 3 days observed in the dataset, and therefore data for 2018-04-13 was 

8161 not returned. 

8162 """ 

8163 if not isinstance(self.index, DatetimeIndex): 

8164 raise TypeError("'first' only supports a DatetimeIndex index") 

8165 

8166 if len(self.index) == 0: 

8167 return self 

8168 

8169 offset = to_offset(offset) 

8170 end_date = end = self.index[0] + offset 

8171 

8172 # Tick-like, e.g. 3 weeks 

8173 if not offset.is_anchored() and hasattr(offset, "_inc"): 

8174 if end_date in self.index: 

8175 end = self.index.searchsorted(end_date, side="left") 

8176 return self.iloc[:end] 

8177 

8178 return self.loc[:end] 

8179 

8180 def last(self: FrameOrSeries, offset) -> FrameOrSeries: 

8181 """ 

8182 Method to subset final periods of time series data based on a date offset. 

8183 

8184 Parameters 

8185 ---------- 

8186 offset : str, DateOffset, dateutil.relativedelta 

8187 

8188 Returns 

8189 ------- 

8190 subset : same type as caller 

8191 

8192 Raises 

8193 ------ 

8194 TypeError 

8195 If the index is not a :class:`DatetimeIndex` 

8196 

8197 See Also 

8198 -------- 

8199 first : Select initial periods of time series based on a date offset. 

8200 at_time : Select values at a particular time of the day. 

8201 between_time : Select values between particular times of the day. 

8202 

8203 Examples 

8204 -------- 

8205 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

8206 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8207 >>> ts 

8208 A 

8209 2018-04-09 1 

8210 2018-04-11 2 

8211 2018-04-13 3 

8212 2018-04-15 4 

8213 

8214 Get the rows for the last 3 days: 

8215 

8216 >>> ts.last('3D') 

8217 A 

8218 2018-04-13 3 

8219 2018-04-15 4 

8220 

8221 Notice the data for 3 last calender days were returned, not the last 

8222 3 observed days in the dataset, and therefore data for 2018-04-11 was 

8223 not returned. 

8224 """ 

8225 if not isinstance(self.index, DatetimeIndex): 

8226 raise TypeError("'last' only supports a DatetimeIndex index") 

8227 

8228 if len(self.index) == 0: 

8229 return self 

8230 

8231 offset = to_offset(offset) 

8232 

8233 start_date = self.index[-1] - offset 

8234 start = self.index.searchsorted(start_date, side="right") 

8235 return self.iloc[start:] 

8236 

8237 def rank( 

8238 self: FrameOrSeries, 

8239 axis=0, 

8240 method: str = "average", 

8241 numeric_only: Optional[bool_t] = None, 

8242 na_option: str = "keep", 

8243 ascending: bool_t = True, 

8244 pct: bool_t = False, 

8245 ) -> FrameOrSeries: 

8246 """ 

8247 Compute numerical data ranks (1 through n) along axis. 

8248 

8249 By default, equal values are assigned a rank that is the average of the 

8250 ranks of those values. 

8251 

8252 Parameters 

8253 ---------- 

8254 axis : {0 or 'index', 1 or 'columns'}, default 0 

8255 Index to direct ranking. 

8256 method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' 

8257 How to rank the group of records that have the same value (i.e. ties): 

8258 

8259 * average: average rank of the group 

8260 * min: lowest rank in the group 

8261 * max: highest rank in the group 

8262 * first: ranks assigned in order they appear in the array 

8263 * dense: like 'min', but rank always increases by 1 between groups. 

8264 

8265 numeric_only : bool, optional 

8266 For DataFrame objects, rank only numeric columns if set to True. 

8267 na_option : {'keep', 'top', 'bottom'}, default 'keep' 

8268 How to rank NaN values: 

8269 

8270 * keep: assign NaN rank to NaN values 

8271 * top: assign smallest rank to NaN values if ascending 

8272 * bottom: assign highest rank to NaN values if ascending. 

8273 

8274 ascending : bool, default True 

8275 Whether or not the elements should be ranked in ascending order. 

8276 pct : bool, default False 

8277 Whether or not to display the returned rankings in percentile 

8278 form. 

8279 

8280 Returns 

8281 ------- 

8282 same type as caller 

8283 Return a Series or DataFrame with data ranks as values. 

8284 

8285 See Also 

8286 -------- 

8287 core.groupby.GroupBy.rank : Rank of values within each group. 

8288 

8289 Examples 

8290 -------- 

8291 

8292 >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', 

8293 ... 'spider', 'snake'], 

8294 ... 'Number_legs': [4, 2, 4, 8, np.nan]}) 

8295 >>> df 

8296 Animal Number_legs 

8297 0 cat 4.0 

8298 1 penguin 2.0 

8299 2 dog 4.0 

8300 3 spider 8.0 

8301 4 snake NaN 

8302 

8303 The following example shows how the method behaves with the above 

8304 parameters: 

8305 

8306 * default_rank: this is the default behaviour obtained without using 

8307 any parameter. 

8308 * max_rank: setting ``method = 'max'`` the records that have the 

8309 same values are ranked using the highest rank (e.g.: since 'cat' 

8310 and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) 

8311 * NA_bottom: choosing ``na_option = 'bottom'``, if there are records 

8312 with NaN values they are placed at the bottom of the ranking. 

8313 * pct_rank: when setting ``pct = True``, the ranking is expressed as 

8314 percentile rank. 

8315 

8316 >>> df['default_rank'] = df['Number_legs'].rank() 

8317 >>> df['max_rank'] = df['Number_legs'].rank(method='max') 

8318 >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') 

8319 >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) 

8320 >>> df 

8321 Animal Number_legs default_rank max_rank NA_bottom pct_rank 

8322 0 cat 4.0 2.5 3.0 2.5 0.625 

8323 1 penguin 2.0 1.0 1.0 1.0 0.250 

8324 2 dog 4.0 2.5 3.0 2.5 0.625 

8325 3 spider 8.0 4.0 4.0 4.0 1.000 

8326 4 snake NaN NaN NaN 5.0 NaN 

8327 """ 

8328 axis = self._get_axis_number(axis) 

8329 

8330 if na_option not in {"keep", "top", "bottom"}: 

8331 msg = "na_option must be one of 'keep', 'top', or 'bottom'" 

8332 raise ValueError(msg) 

8333 

8334 def ranker(data): 

8335 ranks = algos.rank( 

8336 data.values, 

8337 axis=axis, 

8338 method=method, 

8339 ascending=ascending, 

8340 na_option=na_option, 

8341 pct=pct, 

8342 ) 

8343 ranks = self._constructor(ranks, **data._construct_axes_dict()) 

8344 return ranks.__finalize__(self) 

8345 

8346 # if numeric_only is None, and we can't get anything, we try with 

8347 # numeric_only=True 

8348 if numeric_only is None: 

8349 try: 

8350 return ranker(self) 

8351 except TypeError: 

8352 numeric_only = True 

8353 

8354 if numeric_only: 

8355 data = self._get_numeric_data() 

8356 else: 

8357 data = self 

8358 

8359 return ranker(data) 

8360 

8361 _shared_docs[ 

8362 "align" 

8363 ] = """ 

8364 Align two objects on their axes with the specified join method. 

8365 

8366 Join method is specified for each axis Index. 

8367 

8368 Parameters 

8369 ---------- 

8370 other : DataFrame or Series 

8371 join : {'outer', 'inner', 'left', 'right'}, default 'outer' 

8372 axis : allowed axis of the other object, default None 

8373 Align on index (0), columns (1), or both (None). 

8374 level : int or level name, default None 

8375 Broadcast across a level, matching Index values on the 

8376 passed MultiIndex level. 

8377 copy : bool, default True 

8378 Always returns new objects. If copy=False and no reindexing is 

8379 required then original objects are returned. 

8380 fill_value : scalar, default np.NaN 

8381 Value to use for missing values. Defaults to NaN, but can be any 

8382 "compatible" value. 

8383 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

8384 Method to use for filling holes in reindexed Series: 

8385 

8386 - pad / ffill: propagate last valid observation forward to next valid. 

8387 - backfill / bfill: use NEXT valid observation to fill gap. 

8388 

8389 limit : int, default None 

8390 If method is specified, this is the maximum number of consecutive 

8391 NaN values to forward/backward fill. In other words, if there is 

8392 a gap with more than this number of consecutive NaNs, it will only 

8393 be partially filled. If method is not specified, this is the 

8394 maximum number of entries along the entire axis where NaNs will be 

8395 filled. Must be greater than 0 if not None. 

8396 fill_axis : %(axes_single_arg)s, default 0 

8397 Filling axis, method and limit. 

8398 broadcast_axis : %(axes_single_arg)s, default None 

8399 Broadcast values along this axis, if aligning two objects of 

8400 different dimensions. 

8401 

8402 Returns 

8403 ------- 

8404 (left, right) : (%(klass)s, type of other) 

8405 Aligned objects. 

8406 """ 

8407 

8408 @Appender(_shared_docs["align"] % _shared_doc_kwargs) 

8409 def align( 

8410 self, 

8411 other, 

8412 join="outer", 

8413 axis=None, 

8414 level=None, 

8415 copy=True, 

8416 fill_value=None, 

8417 method=None, 

8418 limit=None, 

8419 fill_axis=0, 

8420 broadcast_axis=None, 

8421 ): 

8422 method = missing.clean_fill_method(method) 

8423 

8424 if broadcast_axis == 1 and self.ndim != other.ndim: 

8425 if isinstance(self, ABCSeries): 

8426 # this means other is a DataFrame, and we need to broadcast 

8427 # self 

8428 cons = self._constructor_expanddim 

8429 df = cons( 

8430 {c: self for c in other.columns}, **other._construct_axes_dict() 

8431 ) 

8432 return df._align_frame( 

8433 other, 

8434 join=join, 

8435 axis=axis, 

8436 level=level, 

8437 copy=copy, 

8438 fill_value=fill_value, 

8439 method=method, 

8440 limit=limit, 

8441 fill_axis=fill_axis, 

8442 ) 

8443 elif isinstance(other, ABCSeries): 

8444 # this means self is a DataFrame, and we need to broadcast 

8445 # other 

8446 cons = other._constructor_expanddim 

8447 df = cons( 

8448 {c: other for c in self.columns}, **self._construct_axes_dict() 

8449 ) 

8450 return self._align_frame( 

8451 df, 

8452 join=join, 

8453 axis=axis, 

8454 level=level, 

8455 copy=copy, 

8456 fill_value=fill_value, 

8457 method=method, 

8458 limit=limit, 

8459 fill_axis=fill_axis, 

8460 ) 

8461 

8462 if axis is not None: 

8463 axis = self._get_axis_number(axis) 

8464 if isinstance(other, ABCDataFrame): 

8465 return self._align_frame( 

8466 other, 

8467 join=join, 

8468 axis=axis, 

8469 level=level, 

8470 copy=copy, 

8471 fill_value=fill_value, 

8472 method=method, 

8473 limit=limit, 

8474 fill_axis=fill_axis, 

8475 ) 

8476 elif isinstance(other, ABCSeries): 

8477 return self._align_series( 

8478 other, 

8479 join=join, 

8480 axis=axis, 

8481 level=level, 

8482 copy=copy, 

8483 fill_value=fill_value, 

8484 method=method, 

8485 limit=limit, 

8486 fill_axis=fill_axis, 

8487 ) 

8488 else: # pragma: no cover 

8489 raise TypeError(f"unsupported type: {type(other)}") 

8490 

8491 def _align_frame( 

8492 self, 

8493 other, 

8494 join="outer", 

8495 axis=None, 

8496 level=None, 

8497 copy: bool_t = True, 

8498 fill_value=None, 

8499 method=None, 

8500 limit=None, 

8501 fill_axis=0, 

8502 ): 

8503 # defaults 

8504 join_index, join_columns = None, None 

8505 ilidx, iridx = None, None 

8506 clidx, cridx = None, None 

8507 

8508 is_series = isinstance(self, ABCSeries) 

8509 

8510 if axis is None or axis == 0: 

8511 if not self.index.equals(other.index): 

8512 join_index, ilidx, iridx = self.index.join( 

8513 other.index, how=join, level=level, return_indexers=True 

8514 ) 

8515 

8516 if axis is None or axis == 1: 

8517 if not is_series and not self.columns.equals(other.columns): 

8518 join_columns, clidx, cridx = self.columns.join( 

8519 other.columns, how=join, level=level, return_indexers=True 

8520 ) 

8521 

8522 if is_series: 

8523 reindexers = {0: [join_index, ilidx]} 

8524 else: 

8525 reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} 

8526 

8527 left = self._reindex_with_indexers( 

8528 reindexers, copy=copy, fill_value=fill_value, allow_dups=True 

8529 ) 

8530 # other must be always DataFrame 

8531 right = other._reindex_with_indexers( 

8532 {0: [join_index, iridx], 1: [join_columns, cridx]}, 

8533 copy=copy, 

8534 fill_value=fill_value, 

8535 allow_dups=True, 

8536 ) 

8537 

8538 if method is not None: 

8539 _left = left.fillna(method=method, axis=fill_axis, limit=limit) 

8540 assert _left is not None # needed for mypy 

8541 left = _left 

8542 right = right.fillna(method=method, axis=fill_axis, limit=limit) 

8543 

8544 # if DatetimeIndex have different tz, convert to UTC 

8545 if is_datetime64tz_dtype(left.index): 

8546 if left.index.tz != right.index.tz: 

8547 if join_index is not None: 

8548 left.index = join_index 

8549 right.index = join_index 

8550 

8551 return left.__finalize__(self), right.__finalize__(other) 

8552 

8553 def _align_series( 

8554 self, 

8555 other, 

8556 join="outer", 

8557 axis=None, 

8558 level=None, 

8559 copy: bool_t = True, 

8560 fill_value=None, 

8561 method=None, 

8562 limit=None, 

8563 fill_axis=0, 

8564 ): 

8565 

8566 is_series = isinstance(self, ABCSeries) 

8567 

8568 # series/series compat, other must always be a Series 

8569 if is_series: 

8570 if axis: 

8571 raise ValueError("cannot align series to a series other than axis 0") 

8572 

8573 # equal 

8574 if self.index.equals(other.index): 

8575 join_index, lidx, ridx = None, None, None 

8576 else: 

8577 join_index, lidx, ridx = self.index.join( 

8578 other.index, how=join, level=level, return_indexers=True 

8579 ) 

8580 

8581 left = self._reindex_indexer(join_index, lidx, copy) 

8582 right = other._reindex_indexer(join_index, ridx, copy) 

8583 

8584 else: 

8585 # one has > 1 ndim 

8586 fdata = self._data 

8587 if axis == 0: 

8588 join_index = self.index 

8589 lidx, ridx = None, None 

8590 if not self.index.equals(other.index): 

8591 join_index, lidx, ridx = self.index.join( 

8592 other.index, how=join, level=level, return_indexers=True 

8593 ) 

8594 

8595 if lidx is not None: 

8596 fdata = fdata.reindex_indexer(join_index, lidx, axis=1) 

8597 

8598 elif axis == 1: 

8599 join_index = self.columns 

8600 lidx, ridx = None, None 

8601 if not self.columns.equals(other.index): 

8602 join_index, lidx, ridx = self.columns.join( 

8603 other.index, how=join, level=level, return_indexers=True 

8604 ) 

8605 

8606 if lidx is not None: 

8607 fdata = fdata.reindex_indexer(join_index, lidx, axis=0) 

8608 else: 

8609 raise ValueError("Must specify axis=0 or 1") 

8610 

8611 if copy and fdata is self._data: 

8612 fdata = fdata.copy() 

8613 

8614 left = self._constructor(fdata) 

8615 

8616 if ridx is None: 

8617 right = other 

8618 else: 

8619 right = other.reindex(join_index, level=level) 

8620 

8621 # fill 

8622 fill_na = notna(fill_value) or (method is not None) 

8623 if fill_na: 

8624 left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) 

8625 right = right.fillna(fill_value, method=method, limit=limit) 

8626 

8627 # if DatetimeIndex have different tz, convert to UTC 

8628 if is_series or (not is_series and axis == 0): 

8629 if is_datetime64tz_dtype(left.index): 

8630 if left.index.tz != right.index.tz: 

8631 if join_index is not None: 

8632 left.index = join_index 

8633 right.index = join_index 

8634 

8635 return left.__finalize__(self), right.__finalize__(other) 

8636 

8637 def _where( 

8638 self, 

8639 cond, 

8640 other=np.nan, 

8641 inplace=False, 

8642 axis=None, 

8643 level=None, 

8644 errors="raise", 

8645 try_cast=False, 

8646 ): 

8647 """ 

8648 Equivalent to public method `where`, except that `other` is not 

8649 applied as a function even if callable. Used in __setitem__. 

8650 """ 

8651 inplace = validate_bool_kwarg(inplace, "inplace") 

8652 

8653 # align the cond to same shape as myself 

8654 cond = com.apply_if_callable(cond, self) 

8655 if isinstance(cond, NDFrame): 

8656 cond, _ = cond.align(self, join="right", broadcast_axis=1) 

8657 else: 

8658 if not hasattr(cond, "shape"): 

8659 cond = np.asanyarray(cond) 

8660 if cond.shape != self.shape: 

8661 raise ValueError("Array conditional must be same shape as self") 

8662 cond = self._constructor(cond, **self._construct_axes_dict()) 

8663 

8664 # make sure we are boolean 

8665 fill_value = bool(inplace) 

8666 cond = cond.fillna(fill_value) 

8667 

8668 msg = "Boolean array expected for the condition, not {dtype}" 

8669 

8670 if not isinstance(cond, ABCDataFrame): 

8671 # This is a single-dimensional object. 

8672 if not is_bool_dtype(cond): 

8673 raise ValueError(msg.format(dtype=cond.dtype)) 

8674 elif not cond.empty: 

8675 for dt in cond.dtypes: 

8676 if not is_bool_dtype(dt): 

8677 raise ValueError(msg.format(dtype=dt)) 

8678 

8679 cond = -cond if inplace else cond 

8680 

8681 # try to align with other 

8682 try_quick = True 

8683 if hasattr(other, "align"): 

8684 

8685 # align with me 

8686 if other.ndim <= self.ndim: 

8687 

8688 _, other = self.align( 

8689 other, join="left", axis=axis, level=level, fill_value=np.nan 

8690 ) 

8691 

8692 # if we are NOT aligned, raise as we cannot where index 

8693 if axis is None and not all( 

8694 other._get_axis(i).equals(ax) for i, ax in enumerate(self.axes) 

8695 ): 

8696 raise InvalidIndexError 

8697 

8698 # slice me out of the other 

8699 else: 

8700 raise NotImplementedError( 

8701 "cannot align with a higher dimensional NDFrame" 

8702 ) 

8703 

8704 if isinstance(other, np.ndarray): 

8705 

8706 if other.shape != self.shape: 

8707 

8708 if self.ndim == 1: 

8709 

8710 icond = cond.values 

8711 

8712 # GH 2745 / GH 4192 

8713 # treat like a scalar 

8714 if len(other) == 1: 

8715 other = np.array(other[0]) 

8716 

8717 # GH 3235 

8718 # match True cond to other 

8719 elif len(cond[icond]) == len(other): 

8720 

8721 # try to not change dtype at first (if try_quick) 

8722 if try_quick: 

8723 new_other = com.values_from_object(self) 

8724 new_other = new_other.copy() 

8725 new_other[icond] = other 

8726 other = new_other 

8727 

8728 else: 

8729 raise ValueError( 

8730 "Length of replacements must equal series length" 

8731 ) 

8732 

8733 else: 

8734 raise ValueError( 

8735 "other must be the same shape as self when an ndarray" 

8736 ) 

8737 

8738 # we are the same shape, so create an actual object for alignment 

8739 else: 

8740 other = self._constructor(other, **self._construct_axes_dict()) 

8741 

8742 if axis is None: 

8743 axis = 0 

8744 

8745 if self.ndim == getattr(other, "ndim", 0): 

8746 align = True 

8747 else: 

8748 align = self._get_axis_number(axis) == 1 

8749 

8750 block_axis = self._get_block_manager_axis(axis) 

8751 

8752 if inplace: 

8753 # we may have different type blocks come out of putmask, so 

8754 # reconstruct the block manager 

8755 

8756 self._check_inplace_setting(other) 

8757 new_data = self._data.putmask( 

8758 mask=cond, 

8759 new=other, 

8760 align=align, 

8761 inplace=True, 

8762 axis=block_axis, 

8763 transpose=self._AXIS_REVERSED, 

8764 ) 

8765 self._update_inplace(new_data) 

8766 

8767 else: 

8768 new_data = self._data.where( 

8769 other=other, 

8770 cond=cond, 

8771 align=align, 

8772 errors=errors, 

8773 try_cast=try_cast, 

8774 axis=block_axis, 

8775 ) 

8776 

8777 return self._constructor(new_data).__finalize__(self) 

8778 

8779 _shared_docs[ 

8780 "where" 

8781 ] = """ 

8782 Replace values where the condition is %(cond_rev)s. 

8783 

8784 Parameters 

8785 ---------- 

8786 cond : bool %(klass)s, array-like, or callable 

8787 Where `cond` is %(cond)s, keep the original value. Where 

8788 %(cond_rev)s, replace with corresponding value from `other`. 

8789 If `cond` is callable, it is computed on the %(klass)s and 

8790 should return boolean %(klass)s or array. The callable must 

8791 not change input %(klass)s (though pandas doesn't check it). 

8792 other : scalar, %(klass)s, or callable 

8793 Entries where `cond` is %(cond_rev)s are replaced with 

8794 corresponding value from `other`. 

8795 If other is callable, it is computed on the %(klass)s and 

8796 should return scalar or %(klass)s. The callable must not 

8797 change input %(klass)s (though pandas doesn't check it). 

8798 inplace : bool, default False 

8799 Whether to perform the operation in place on the data. 

8800 axis : int, default None 

8801 Alignment axis if needed. 

8802 level : int, default None 

8803 Alignment level if needed. 

8804 errors : str, {'raise', 'ignore'}, default 'raise' 

8805 Note that currently this parameter won't affect 

8806 the results and will always coerce to a suitable dtype. 

8807 

8808 - 'raise' : allow exceptions to be raised. 

8809 - 'ignore' : suppress exceptions. On error return original object. 

8810 

8811 try_cast : bool, default False 

8812 Try to cast the result back to the input type (if possible). 

8813 

8814 Returns 

8815 ------- 

8816 Same type as caller 

8817 

8818 See Also 

8819 -------- 

8820 :func:`DataFrame.%(name_other)s` : Return an object of same shape as 

8821 self. 

8822 

8823 Notes 

8824 ----- 

8825 The %(name)s method is an application of the if-then idiom. For each 

8826 element in the calling DataFrame, if ``cond`` is ``%(cond)s`` the 

8827 element is used; otherwise the corresponding element from the DataFrame 

8828 ``other`` is used. 

8829 

8830 The signature for :func:`DataFrame.where` differs from 

8831 :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to 

8832 ``np.where(m, df1, df2)``. 

8833 

8834 For further details and examples see the ``%(name)s`` documentation in 

8835 :ref:`indexing <indexing.where_mask>`. 

8836 

8837 Examples 

8838 -------- 

8839 >>> s = pd.Series(range(5)) 

8840 >>> s.where(s > 0) 

8841 0 NaN 

8842 1 1.0 

8843 2 2.0 

8844 3 3.0 

8845 4 4.0 

8846 dtype: float64 

8847 

8848 >>> s.mask(s > 0) 

8849 0 0.0 

8850 1 NaN 

8851 2 NaN 

8852 3 NaN 

8853 4 NaN 

8854 dtype: float64 

8855 

8856 >>> s.where(s > 1, 10) 

8857 0 10 

8858 1 10 

8859 2 2 

8860 3 3 

8861 4 4 

8862 dtype: int64 

8863 

8864 >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) 

8865 >>> df 

8866 A B 

8867 0 0 1 

8868 1 2 3 

8869 2 4 5 

8870 3 6 7 

8871 4 8 9 

8872 >>> m = df %% 3 == 0 

8873 >>> df.where(m, -df) 

8874 A B 

8875 0 0 -1 

8876 1 -2 3 

8877 2 -4 -5 

8878 3 6 -7 

8879 4 -8 9 

8880 >>> df.where(m, -df) == np.where(m, df, -df) 

8881 A B 

8882 0 True True 

8883 1 True True 

8884 2 True True 

8885 3 True True 

8886 4 True True 

8887 >>> df.where(m, -df) == df.mask(~m, -df) 

8888 A B 

8889 0 True True 

8890 1 True True 

8891 2 True True 

8892 3 True True 

8893 4 True True 

8894 """ 

8895 

8896 @Appender( 

8897 _shared_docs["where"] 

8898 % dict( 

8899 _shared_doc_kwargs, 

8900 cond="True", 

8901 cond_rev="False", 

8902 name="where", 

8903 name_other="mask", 

8904 ) 

8905 ) 

8906 def where( 

8907 self, 

8908 cond, 

8909 other=np.nan, 

8910 inplace=False, 

8911 axis=None, 

8912 level=None, 

8913 errors="raise", 

8914 try_cast=False, 

8915 ): 

8916 

8917 other = com.apply_if_callable(other, self) 

8918 return self._where( 

8919 cond, other, inplace, axis, level, errors=errors, try_cast=try_cast 

8920 ) 

8921 

8922 @Appender( 

8923 _shared_docs["where"] 

8924 % dict( 

8925 _shared_doc_kwargs, 

8926 cond="False", 

8927 cond_rev="True", 

8928 name="mask", 

8929 name_other="where", 

8930 ) 

8931 ) 

8932 def mask( 

8933 self, 

8934 cond, 

8935 other=np.nan, 

8936 inplace=False, 

8937 axis=None, 

8938 level=None, 

8939 errors="raise", 

8940 try_cast=False, 

8941 ): 

8942 

8943 inplace = validate_bool_kwarg(inplace, "inplace") 

8944 cond = com.apply_if_callable(cond, self) 

8945 

8946 # see gh-21891 

8947 if not hasattr(cond, "__invert__"): 

8948 cond = np.array(cond) 

8949 

8950 return self.where( 

8951 ~cond, 

8952 other=other, 

8953 inplace=inplace, 

8954 axis=axis, 

8955 level=level, 

8956 try_cast=try_cast, 

8957 errors=errors, 

8958 ) 

8959 

8960 _shared_docs[ 

8961 "shift" 

8962 ] = """ 

8963 Shift index by desired number of periods with an optional time `freq`. 

8964 

8965 When `freq` is not passed, shift the index without realigning the data. 

8966 If `freq` is passed (in this case, the index must be date or datetime, 

8967 or it will raise a `NotImplementedError`), the index will be 

8968 increased using the periods and the `freq`. 

8969 

8970 Parameters 

8971 ---------- 

8972 periods : int 

8973 Number of periods to shift. Can be positive or negative. 

8974 freq : DateOffset, tseries.offsets, timedelta, or str, optional 

8975 Offset to use from the tseries module or time rule (e.g. 'EOM'). 

8976 If `freq` is specified then the index values are shifted but the 

8977 data is not realigned. That is, use `freq` if you would like to 

8978 extend the index when shifting and preserve the original data. 

8979 axis : {0 or 'index', 1 or 'columns', None}, default None 

8980 Shift direction. 

8981 fill_value : object, optional 

8982 The scalar value to use for newly introduced missing values. 

8983 the default depends on the dtype of `self`. 

8984 For numeric data, ``np.nan`` is used. 

8985 For datetime, timedelta, or period data, etc. :attr:`NaT` is used. 

8986 For extension dtypes, ``self.dtype.na_value`` is used. 

8987 

8988 .. versionchanged:: 0.24.0 

8989 

8990 Returns 

8991 ------- 

8992 %(klass)s 

8993 Copy of input object, shifted. 

8994 

8995 See Also 

8996 -------- 

8997 Index.shift : Shift values of Index. 

8998 DatetimeIndex.shift : Shift values of DatetimeIndex. 

8999 PeriodIndex.shift : Shift values of PeriodIndex. 

9000 tshift : Shift the time index, using the index's frequency if 

9001 available. 

9002 

9003 Examples 

9004 -------- 

9005 >>> df = pd.DataFrame({'Col1': [10, 20, 15, 30, 45], 

9006 ... 'Col2': [13, 23, 18, 33, 48], 

9007 ... 'Col3': [17, 27, 22, 37, 52]}) 

9008 

9009 >>> df.shift(periods=3) 

9010 Col1 Col2 Col3 

9011 0 NaN NaN NaN 

9012 1 NaN NaN NaN 

9013 2 NaN NaN NaN 

9014 3 10.0 13.0 17.0 

9015 4 20.0 23.0 27.0 

9016 

9017 >>> df.shift(periods=1, axis='columns') 

9018 Col1 Col2 Col3 

9019 0 NaN 10.0 13.0 

9020 1 NaN 20.0 23.0 

9021 2 NaN 15.0 18.0 

9022 3 NaN 30.0 33.0 

9023 4 NaN 45.0 48.0 

9024 

9025 >>> df.shift(periods=3, fill_value=0) 

9026 Col1 Col2 Col3 

9027 0 0 0 0 

9028 1 0 0 0 

9029 2 0 0 0 

9030 3 10 13 17 

9031 4 20 23 27 

9032 """ 

9033 

9034 @Appender(_shared_docs["shift"] % _shared_doc_kwargs) 

9035 def shift( 

9036 self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None 

9037 ) -> FrameOrSeries: 

9038 if periods == 0: 

9039 return self.copy() 

9040 

9041 block_axis = self._get_block_manager_axis(axis) 

9042 if freq is None: 

9043 new_data = self._data.shift( 

9044 periods=periods, axis=block_axis, fill_value=fill_value 

9045 ) 

9046 else: 

9047 return self.tshift(periods, freq) 

9048 

9049 return self._constructor(new_data).__finalize__(self) 

9050 

9051 def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: 

9052 """ 

9053 Equivalent to `shift` without copying data. 

9054 

9055 The shifted data will not include the dropped periods and the 

9056 shifted axis will be smaller than the original. 

9057 

9058 Parameters 

9059 ---------- 

9060 periods : int 

9061 Number of periods to move, can be positive or negative. 

9062 

9063 Returns 

9064 ------- 

9065 shifted : same type as caller 

9066 

9067 Notes 

9068 ----- 

9069 While the `slice_shift` is faster than `shift`, you may pay for it 

9070 later during alignment. 

9071 """ 

9072 if periods == 0: 

9073 return self 

9074 

9075 if periods > 0: 

9076 vslicer = slice(None, -periods) 

9077 islicer = slice(periods, None) 

9078 else: 

9079 vslicer = slice(-periods, None) 

9080 islicer = slice(None, periods) 

9081 

9082 new_obj = self._slice(vslicer, axis=axis) 

9083 shifted_axis = self._get_axis(axis)[islicer] 

9084 new_obj.set_axis(shifted_axis, axis=axis, inplace=True) 

9085 

9086 return new_obj.__finalize__(self) 

9087 

9088 def tshift( 

9089 self: FrameOrSeries, periods: int = 1, freq=None, axis=0 

9090 ) -> FrameOrSeries: 

9091 """ 

9092 Shift the time index, using the index's frequency if available. 

9093 

9094 Parameters 

9095 ---------- 

9096 periods : int 

9097 Number of periods to move, can be positive or negative. 

9098 freq : DateOffset, timedelta, or str, default None 

9099 Increment to use from the tseries module 

9100 or time rule expressed as a string (e.g. 'EOM'). 

9101 axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 

9102 Corresponds to the axis that contains the Index. 

9103 

9104 Returns 

9105 ------- 

9106 shifted : Series/DataFrame 

9107 

9108 Notes 

9109 ----- 

9110 If freq is not specified then tries to use the freq or inferred_freq 

9111 attributes of the index. If neither of those attributes exist, a 

9112 ValueError is thrown 

9113 """ 

9114 

9115 index = self._get_axis(axis) 

9116 if freq is None: 

9117 freq = getattr(index, "freq", None) 

9118 

9119 if freq is None: 

9120 freq = getattr(index, "inferred_freq", None) 

9121 

9122 if freq is None: 

9123 msg = "Freq was not given and was not set in the index" 

9124 raise ValueError(msg) 

9125 

9126 if periods == 0: 

9127 return self 

9128 

9129 if isinstance(freq, str): 

9130 freq = to_offset(freq) 

9131 

9132 block_axis = self._get_block_manager_axis(axis) 

9133 if isinstance(index, PeriodIndex): 

9134 orig_freq = to_offset(index.freq) 

9135 if freq == orig_freq: 

9136 new_data = self._data.copy() 

9137 new_data.axes[block_axis] = index.shift(periods) 

9138 elif orig_freq is not None: 

9139 msg = ( 

9140 f"Given freq {freq.rule_code} does not match" 

9141 f" PeriodIndex freq {orig_freq.rule_code}" 

9142 ) 

9143 raise ValueError(msg) 

9144 else: 

9145 new_data = self._data.copy() 

9146 new_data.axes[block_axis] = index.shift(periods, freq) 

9147 

9148 return self._constructor(new_data).__finalize__(self) 

9149 

9150 def truncate( 

9151 self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True 

9152 ) -> FrameOrSeries: 

9153 """ 

9154 Truncate a Series or DataFrame before and after some index value. 

9155 

9156 This is a useful shorthand for boolean indexing based on index 

9157 values above or below certain thresholds. 

9158 

9159 Parameters 

9160 ---------- 

9161 before : date, str, int 

9162 Truncate all rows before this index value. 

9163 after : date, str, int 

9164 Truncate all rows after this index value. 

9165 axis : {0 or 'index', 1 or 'columns'}, optional 

9166 Axis to truncate. Truncates the index (rows) by default. 

9167 copy : bool, default is True, 

9168 Return a copy of the truncated section. 

9169 

9170 Returns 

9171 ------- 

9172 type of caller 

9173 The truncated Series or DataFrame. 

9174 

9175 See Also 

9176 -------- 

9177 DataFrame.loc : Select a subset of a DataFrame by label. 

9178 DataFrame.iloc : Select a subset of a DataFrame by position. 

9179 

9180 Notes 

9181 ----- 

9182 If the index being truncated contains only datetime values, 

9183 `before` and `after` may be specified as strings instead of 

9184 Timestamps. 

9185 

9186 Examples 

9187 -------- 

9188 >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], 

9189 ... 'B': ['f', 'g', 'h', 'i', 'j'], 

9190 ... 'C': ['k', 'l', 'm', 'n', 'o']}, 

9191 ... index=[1, 2, 3, 4, 5]) 

9192 >>> df 

9193 A B C 

9194 1 a f k 

9195 2 b g l 

9196 3 c h m 

9197 4 d i n 

9198 5 e j o 

9199 

9200 >>> df.truncate(before=2, after=4) 

9201 A B C 

9202 2 b g l 

9203 3 c h m 

9204 4 d i n 

9205 

9206 The columns of a DataFrame can be truncated. 

9207 

9208 >>> df.truncate(before="A", after="B", axis="columns") 

9209 A B 

9210 1 a f 

9211 2 b g 

9212 3 c h 

9213 4 d i 

9214 5 e j 

9215 

9216 For Series, only rows can be truncated. 

9217 

9218 >>> df['A'].truncate(before=2, after=4) 

9219 2 b 

9220 3 c 

9221 4 d 

9222 Name: A, dtype: object 

9223 

9224 The index values in ``truncate`` can be datetimes or string 

9225 dates. 

9226 

9227 >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') 

9228 >>> df = pd.DataFrame(index=dates, data={'A': 1}) 

9229 >>> df.tail() 

9230 A 

9231 2016-01-31 23:59:56 1 

9232 2016-01-31 23:59:57 1 

9233 2016-01-31 23:59:58 1 

9234 2016-01-31 23:59:59 1 

9235 2016-02-01 00:00:00 1 

9236 

9237 >>> df.truncate(before=pd.Timestamp('2016-01-05'), 

9238 ... after=pd.Timestamp('2016-01-10')).tail() 

9239 A 

9240 2016-01-09 23:59:56 1 

9241 2016-01-09 23:59:57 1 

9242 2016-01-09 23:59:58 1 

9243 2016-01-09 23:59:59 1 

9244 2016-01-10 00:00:00 1 

9245 

9246 Because the index is a DatetimeIndex containing only dates, we can 

9247 specify `before` and `after` as strings. They will be coerced to 

9248 Timestamps before truncation. 

9249 

9250 >>> df.truncate('2016-01-05', '2016-01-10').tail() 

9251 A 

9252 2016-01-09 23:59:56 1 

9253 2016-01-09 23:59:57 1 

9254 2016-01-09 23:59:58 1 

9255 2016-01-09 23:59:59 1 

9256 2016-01-10 00:00:00 1 

9257 

9258 Note that ``truncate`` assumes a 0 value for any unspecified time 

9259 component (midnight). This differs from partial string slicing, which 

9260 returns any partially matching dates. 

9261 

9262 >>> df.loc['2016-01-05':'2016-01-10', :].tail() 

9263 A 

9264 2016-01-10 23:59:55 1 

9265 2016-01-10 23:59:56 1 

9266 2016-01-10 23:59:57 1 

9267 2016-01-10 23:59:58 1 

9268 2016-01-10 23:59:59 1 

9269 """ 

9270 if axis is None: 

9271 axis = self._stat_axis_number 

9272 axis = self._get_axis_number(axis) 

9273 ax = self._get_axis(axis) 

9274 

9275 # GH 17935 

9276 # Check that index is sorted 

9277 if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: 

9278 raise ValueError("truncate requires a sorted index") 

9279 

9280 # if we have a date index, convert to dates, otherwise 

9281 # treat like a slice 

9282 if ax.is_all_dates: 

9283 from pandas.core.tools.datetimes import to_datetime 

9284 

9285 before = to_datetime(before) 

9286 after = to_datetime(after) 

9287 

9288 if before is not None and after is not None: 

9289 if before > after: 

9290 raise ValueError(f"Truncate: {after} must be after {before}") 

9291 

9292 slicer = [slice(None, None)] * self._AXIS_LEN 

9293 slicer[axis] = slice(before, after) 

9294 result = self.loc[tuple(slicer)] 

9295 

9296 if isinstance(ax, MultiIndex): 

9297 setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) 

9298 

9299 if copy: 

9300 result = result.copy() 

9301 

9302 return result 

9303 

9304 def tz_convert( 

9305 self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True 

9306 ) -> FrameOrSeries: 

9307 """ 

9308 Convert tz-aware axis to target time zone. 

9309 

9310 Parameters 

9311 ---------- 

9312 tz : str or tzinfo object 

9313 axis : the axis to convert 

9314 level : int, str, default None 

9315 If axis is a MultiIndex, convert a specific level. Otherwise 

9316 must be None. 

9317 copy : bool, default True 

9318 Also make a copy of the underlying data. 

9319 

9320 Returns 

9321 ------- 

9322 %(klass)s 

9323 Object with time zone converted axis. 

9324 

9325 Raises 

9326 ------ 

9327 TypeError 

9328 If the axis is tz-naive. 

9329 """ 

9330 axis = self._get_axis_number(axis) 

9331 ax = self._get_axis(axis) 

9332 

9333 def _tz_convert(ax, tz): 

9334 if not hasattr(ax, "tz_convert"): 

9335 if len(ax) > 0: 

9336 ax_name = self._get_axis_name(axis) 

9337 raise TypeError( 

9338 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

9339 ) 

9340 else: 

9341 ax = DatetimeIndex([], tz=tz) 

9342 else: 

9343 ax = ax.tz_convert(tz) 

9344 return ax 

9345 

9346 # if a level is given it must be a MultiIndex level or 

9347 # equivalent to the axis name 

9348 if isinstance(ax, MultiIndex): 

9349 level = ax._get_level_number(level) 

9350 new_level = _tz_convert(ax.levels[level], tz) 

9351 ax = ax.set_levels(new_level, level=level) 

9352 else: 

9353 if level not in (None, 0, ax.name): 

9354 raise ValueError(f"The level {level} is not valid") 

9355 ax = _tz_convert(ax, tz) 

9356 

9357 result = self._constructor(self._data, copy=copy) 

9358 result = result.set_axis(ax, axis=axis, inplace=False) 

9359 return result.__finalize__(self) 

9360 

9361 def tz_localize( 

9362 self: FrameOrSeries, 

9363 tz, 

9364 axis=0, 

9365 level=None, 

9366 copy: bool_t = True, 

9367 ambiguous="raise", 

9368 nonexistent: str = "raise", 

9369 ) -> FrameOrSeries: 

9370 """ 

9371 Localize tz-naive index of a Series or DataFrame to target time zone. 

9372 

9373 This operation localizes the Index. To localize the values in a 

9374 timezone-naive Series, use :meth:`Series.dt.tz_localize`. 

9375 

9376 Parameters 

9377 ---------- 

9378 tz : str or tzinfo 

9379 axis : the axis to localize 

9380 level : int, str, default None 

9381 If axis ia a MultiIndex, localize a specific level. Otherwise 

9382 must be None. 

9383 copy : bool, default True 

9384 Also make a copy of the underlying data. 

9385 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' 

9386 When clocks moved backward due to DST, ambiguous times may arise. 

9387 For example in Central European Time (UTC+01), when going from 

9388 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

9389 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

9390 `ambiguous` parameter dictates how ambiguous times should be 

9391 handled. 

9392 

9393 - 'infer' will attempt to infer fall dst-transition hours based on 

9394 order 

9395 - bool-ndarray where True signifies a DST time, False designates 

9396 a non-DST time (note that this flag is only applicable for 

9397 ambiguous times) 

9398 - 'NaT' will return NaT where there are ambiguous times 

9399 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

9400 times. 

9401 nonexistent : str, default 'raise' 

9402 A nonexistent time does not exist in a particular timezone 

9403 where clocks moved forward due to DST. Valid values are: 

9404 

9405 - 'shift_forward' will shift the nonexistent time forward to the 

9406 closest existing time 

9407 - 'shift_backward' will shift the nonexistent time backward to the 

9408 closest existing time 

9409 - 'NaT' will return NaT where there are nonexistent times 

9410 - timedelta objects will shift nonexistent times by the timedelta 

9411 - 'raise' will raise an NonExistentTimeError if there are 

9412 nonexistent times. 

9413 

9414 .. versionadded:: 0.24.0 

9415 

9416 Returns 

9417 ------- 

9418 Series or DataFrame 

9419 Same type as the input. 

9420 

9421 Raises 

9422 ------ 

9423 TypeError 

9424 If the TimeSeries is tz-aware and tz is not None. 

9425 

9426 Examples 

9427 -------- 

9428 

9429 Localize local times: 

9430 

9431 >>> s = pd.Series([1], 

9432 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) 

9433 >>> s.tz_localize('CET') 

9434 2018-09-15 01:30:00+02:00 1 

9435 dtype: int64 

9436 

9437 Be careful with DST changes. When there is sequential data, pandas 

9438 can infer the DST time: 

9439 

9440 >>> s = pd.Series(range(7), 

9441 ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', 

9442 ... '2018-10-28 02:00:00', 

9443 ... '2018-10-28 02:30:00', 

9444 ... '2018-10-28 02:00:00', 

9445 ... '2018-10-28 02:30:00', 

9446 ... '2018-10-28 03:00:00', 

9447 ... '2018-10-28 03:30:00'])) 

9448 >>> s.tz_localize('CET', ambiguous='infer') 

9449 2018-10-28 01:30:00+02:00 0 

9450 2018-10-28 02:00:00+02:00 1 

9451 2018-10-28 02:30:00+02:00 2 

9452 2018-10-28 02:00:00+01:00 3 

9453 2018-10-28 02:30:00+01:00 4 

9454 2018-10-28 03:00:00+01:00 5 

9455 2018-10-28 03:30:00+01:00 6 

9456 dtype: int64 

9457 

9458 In some cases, inferring the DST is impossible. In such cases, you can 

9459 pass an ndarray to the ambiguous parameter to set the DST explicitly 

9460 

9461 >>> s = pd.Series(range(3), 

9462 ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', 

9463 ... '2018-10-28 02:36:00', 

9464 ... '2018-10-28 03:46:00'])) 

9465 >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) 

9466 2018-10-28 01:20:00+02:00 0 

9467 2018-10-28 02:36:00+02:00 1 

9468 2018-10-28 03:46:00+01:00 2 

9469 dtype: int64 

9470 

9471 If the DST transition causes nonexistent times, you can shift these 

9472 dates forward or backwards with a timedelta object or `'shift_forward'` 

9473 or `'shift_backwards'`. 

9474 >>> s = pd.Series(range(2), 

9475 ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', 

9476 ... '2015-03-29 03:30:00'])) 

9477 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

9478 2015-03-29 03:00:00+02:00 0 

9479 2015-03-29 03:30:00+02:00 1 

9480 dtype: int64 

9481 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

9482 2015-03-29 01:59:59.999999999+01:00 0 

9483 2015-03-29 03:30:00+02:00 1 

9484 dtype: int64 

9485 >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 

9486 2015-03-29 03:30:00+02:00 0 

9487 2015-03-29 03:30:00+02:00 1 

9488 dtype: int64 

9489 """ 

9490 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

9491 if nonexistent not in nonexistent_options and not isinstance( 

9492 nonexistent, timedelta 

9493 ): 

9494 raise ValueError( 

9495 "The nonexistent argument must be one of 'raise', " 

9496 "'NaT', 'shift_forward', 'shift_backward' or " 

9497 "a timedelta object" 

9498 ) 

9499 

9500 axis = self._get_axis_number(axis) 

9501 ax = self._get_axis(axis) 

9502 

9503 def _tz_localize(ax, tz, ambiguous, nonexistent): 

9504 if not hasattr(ax, "tz_localize"): 

9505 if len(ax) > 0: 

9506 ax_name = self._get_axis_name(axis) 

9507 raise TypeError( 

9508 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

9509 ) 

9510 else: 

9511 ax = DatetimeIndex([], tz=tz) 

9512 else: 

9513 ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) 

9514 return ax 

9515 

9516 # if a level is given it must be a MultiIndex level or 

9517 # equivalent to the axis name 

9518 if isinstance(ax, MultiIndex): 

9519 level = ax._get_level_number(level) 

9520 new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) 

9521 ax = ax.set_levels(new_level, level=level) 

9522 else: 

9523 if level not in (None, 0, ax.name): 

9524 raise ValueError(f"The level {level} is not valid") 

9525 ax = _tz_localize(ax, tz, ambiguous, nonexistent) 

9526 

9527 result = self._constructor(self._data, copy=copy) 

9528 result = result.set_axis(ax, axis=axis, inplace=False) 

9529 return result.__finalize__(self) 

9530 

9531 # ---------------------------------------------------------------------- 

9532 # Numeric Methods 

9533 def abs(self: FrameOrSeries) -> FrameOrSeries: 

9534 """ 

9535 Return a Series/DataFrame with absolute numeric value of each element. 

9536 

9537 This function only applies to elements that are all numeric. 

9538 

9539 Returns 

9540 ------- 

9541 abs 

9542 Series/DataFrame containing the absolute value of each element. 

9543 

9544 See Also 

9545 -------- 

9546 numpy.absolute : Calculate the absolute value element-wise. 

9547 

9548 Notes 

9549 ----- 

9550 For ``complex`` inputs, ``1.2 + 1j``, the absolute value is 

9551 :math:`\\sqrt{ a^2 + b^2 }`. 

9552 

9553 Examples 

9554 -------- 

9555 Absolute numeric values in a Series. 

9556 

9557 >>> s = pd.Series([-1.10, 2, -3.33, 4]) 

9558 >>> s.abs() 

9559 0 1.10 

9560 1 2.00 

9561 2 3.33 

9562 3 4.00 

9563 dtype: float64 

9564 

9565 Absolute numeric values in a Series with complex numbers. 

9566 

9567 >>> s = pd.Series([1.2 + 1j]) 

9568 >>> s.abs() 

9569 0 1.56205 

9570 dtype: float64 

9571 

9572 Absolute numeric values in a Series with a Timedelta element. 

9573 

9574 >>> s = pd.Series([pd.Timedelta('1 days')]) 

9575 >>> s.abs() 

9576 0 1 days 

9577 dtype: timedelta64[ns] 

9578 

9579 Select rows with data closest to certain value using argsort (from 

9580 `StackOverflow <https://stackoverflow.com/a/17758115>`__). 

9581 

9582 >>> df = pd.DataFrame({ 

9583 ... 'a': [4, 5, 6, 7], 

9584 ... 'b': [10, 20, 30, 40], 

9585 ... 'c': [100, 50, -30, -50] 

9586 ... }) 

9587 >>> df 

9588 a b c 

9589 0 4 10 100 

9590 1 5 20 50 

9591 2 6 30 -30 

9592 3 7 40 -50 

9593 >>> df.loc[(df.c - 43).abs().argsort()] 

9594 a b c 

9595 1 5 20 50 

9596 0 4 10 100 

9597 2 6 30 -30 

9598 3 7 40 -50 

9599 """ 

9600 return np.abs(self) 

9601 

9602 def describe( 

9603 self: FrameOrSeries, percentiles=None, include=None, exclude=None 

9604 ) -> FrameOrSeries: 

9605 """ 

9606 Generate descriptive statistics. 

9607 

9608 Descriptive statistics include those that summarize the central 

9609 tendency, dispersion and shape of a 

9610 dataset's distribution, excluding ``NaN`` values. 

9611 

9612 Analyzes both numeric and object series, as well 

9613 as ``DataFrame`` column sets of mixed data types. The output 

9614 will vary depending on what is provided. Refer to the notes 

9615 below for more detail. 

9616 

9617 Parameters 

9618 ---------- 

9619 percentiles : list-like of numbers, optional 

9620 The percentiles to include in the output. All should 

9621 fall between 0 and 1. The default is 

9622 ``[.25, .5, .75]``, which returns the 25th, 50th, and 

9623 75th percentiles. 

9624 include : 'all', list-like of dtypes or None (default), optional 

9625 A white list of data types to include in the result. Ignored 

9626 for ``Series``. Here are the options: 

9627 

9628 - 'all' : All columns of the input will be included in the output. 

9629 - A list-like of dtypes : Limits the results to the 

9630 provided data types. 

9631 To limit the result to numeric types submit 

9632 ``numpy.number``. To limit it instead to object columns submit 

9633 the ``numpy.object`` data type. Strings 

9634 can also be used in the style of 

9635 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 

9636 select pandas categorical columns, use ``'category'`` 

9637 - None (default) : The result will include all numeric columns. 

9638 exclude : list-like of dtypes or None (default), optional, 

9639 A black list of data types to omit from the result. Ignored 

9640 for ``Series``. Here are the options: 

9641 

9642 - A list-like of dtypes : Excludes the provided data types 

9643 from the result. To exclude numeric types submit 

9644 ``numpy.number``. To exclude object columns submit the data 

9645 type ``numpy.object``. Strings can also be used in the style of 

9646 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 

9647 exclude pandas categorical columns, use ``'category'`` 

9648 - None (default) : The result will exclude nothing. 

9649 

9650 Returns 

9651 ------- 

9652 Series or DataFrame 

9653 Summary statistics of the Series or Dataframe provided. 

9654 

9655 See Also 

9656 -------- 

9657 DataFrame.count: Count number of non-NA/null observations. 

9658 DataFrame.max: Maximum of the values in the object. 

9659 DataFrame.min: Minimum of the values in the object. 

9660 DataFrame.mean: Mean of the values. 

9661 DataFrame.std: Standard deviation of the observations. 

9662 DataFrame.select_dtypes: Subset of a DataFrame including/excluding 

9663 columns based on their dtype. 

9664 

9665 Notes 

9666 ----- 

9667 For numeric data, the result's index will include ``count``, 

9668 ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and 

9669 upper percentiles. By default the lower percentile is ``25`` and the 

9670 upper percentile is ``75``. The ``50`` percentile is the 

9671 same as the median. 

9672 

9673 For object data (e.g. strings or timestamps), the result's index 

9674 will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` 

9675 is the most common value. The ``freq`` is the most common value's 

9676 frequency. Timestamps also include the ``first`` and ``last`` items. 

9677 

9678 If multiple object values have the highest count, then the 

9679 ``count`` and ``top`` results will be arbitrarily chosen from 

9680 among those with the highest count. 

9681 

9682 For mixed data types provided via a ``DataFrame``, the default is to 

9683 return only an analysis of numeric columns. If the dataframe consists 

9684 only of object and categorical data without any numeric columns, the 

9685 default is to return an analysis of both the object and categorical 

9686 columns. If ``include='all'`` is provided as an option, the result 

9687 will include a union of attributes of each type. 

9688 

9689 The `include` and `exclude` parameters can be used to limit 

9690 which columns in a ``DataFrame`` are analyzed for the output. 

9691 The parameters are ignored when analyzing a ``Series``. 

9692 

9693 Examples 

9694 -------- 

9695 Describing a numeric ``Series``. 

9696 

9697 >>> s = pd.Series([1, 2, 3]) 

9698 >>> s.describe() 

9699 count 3.0 

9700 mean 2.0 

9701 std 1.0 

9702 min 1.0 

9703 25% 1.5 

9704 50% 2.0 

9705 75% 2.5 

9706 max 3.0 

9707 dtype: float64 

9708 

9709 Describing a categorical ``Series``. 

9710 

9711 >>> s = pd.Series(['a', 'a', 'b', 'c']) 

9712 >>> s.describe() 

9713 count 4 

9714 unique 3 

9715 top a 

9716 freq 2 

9717 dtype: object 

9718 

9719 Describing a timestamp ``Series``. 

9720 

9721 >>> s = pd.Series([ 

9722 ... np.datetime64("2000-01-01"), 

9723 ... np.datetime64("2010-01-01"), 

9724 ... np.datetime64("2010-01-01") 

9725 ... ]) 

9726 >>> s.describe() 

9727 count 3 

9728 unique 2 

9729 top 2010-01-01 00:00:00 

9730 freq 2 

9731 first 2000-01-01 00:00:00 

9732 last 2010-01-01 00:00:00 

9733 dtype: object 

9734 

9735 Describing a ``DataFrame``. By default only numeric fields 

9736 are returned. 

9737 

9738 >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), 

9739 ... 'numeric': [1, 2, 3], 

9740 ... 'object': ['a', 'b', 'c'] 

9741 ... }) 

9742 >>> df.describe() 

9743 numeric 

9744 count 3.0 

9745 mean 2.0 

9746 std 1.0 

9747 min 1.0 

9748 25% 1.5 

9749 50% 2.0 

9750 75% 2.5 

9751 max 3.0 

9752 

9753 Describing all columns of a ``DataFrame`` regardless of data type. 

9754 

9755 >>> df.describe(include='all') 

9756 categorical numeric object 

9757 count 3 3.0 3 

9758 unique 3 NaN 3 

9759 top f NaN c 

9760 freq 1 NaN 1 

9761 mean NaN 2.0 NaN 

9762 std NaN 1.0 NaN 

9763 min NaN 1.0 NaN 

9764 25% NaN 1.5 NaN 

9765 50% NaN 2.0 NaN 

9766 75% NaN 2.5 NaN 

9767 max NaN 3.0 NaN 

9768 

9769 Describing a column from a ``DataFrame`` by accessing it as 

9770 an attribute. 

9771 

9772 >>> df.numeric.describe() 

9773 count 3.0 

9774 mean 2.0 

9775 std 1.0 

9776 min 1.0 

9777 25% 1.5 

9778 50% 2.0 

9779 75% 2.5 

9780 max 3.0 

9781 Name: numeric, dtype: float64 

9782 

9783 Including only numeric columns in a ``DataFrame`` description. 

9784 

9785 >>> df.describe(include=[np.number]) 

9786 numeric 

9787 count 3.0 

9788 mean 2.0 

9789 std 1.0 

9790 min 1.0 

9791 25% 1.5 

9792 50% 2.0 

9793 75% 2.5 

9794 max 3.0 

9795 

9796 Including only string columns in a ``DataFrame`` description. 

9797 

9798 >>> df.describe(include=[np.object]) 

9799 object 

9800 count 3 

9801 unique 3 

9802 top c 

9803 freq 1 

9804 

9805 Including only categorical columns from a ``DataFrame`` description. 

9806 

9807 >>> df.describe(include=['category']) 

9808 categorical 

9809 count 3 

9810 unique 3 

9811 top f 

9812 freq 1 

9813 

9814 Excluding numeric columns from a ``DataFrame`` description. 

9815 

9816 >>> df.describe(exclude=[np.number]) 

9817 categorical object 

9818 count 3 3 

9819 unique 3 3 

9820 top f c 

9821 freq 1 1 

9822 

9823 Excluding object columns from a ``DataFrame`` description. 

9824 

9825 >>> df.describe(exclude=[np.object]) 

9826 categorical numeric 

9827 count 3 3.0 

9828 unique 3 NaN 

9829 top f NaN 

9830 freq 1 NaN 

9831 mean NaN 2.0 

9832 std NaN 1.0 

9833 min NaN 1.0 

9834 25% NaN 1.5 

9835 50% NaN 2.0 

9836 75% NaN 2.5 

9837 max NaN 3.0 

9838 """ 

9839 if self.ndim == 2 and self.columns.size == 0: 

9840 raise ValueError("Cannot describe a DataFrame without columns") 

9841 

9842 if percentiles is not None: 

9843 # explicit conversion of `percentiles` to list 

9844 percentiles = list(percentiles) 

9845 

9846 # get them all to be in [0, 1] 

9847 validate_percentile(percentiles) 

9848 

9849 # median should always be included 

9850 if 0.5 not in percentiles: 

9851 percentiles.append(0.5) 

9852 percentiles = np.asarray(percentiles) 

9853 else: 

9854 percentiles = np.array([0.25, 0.5, 0.75]) 

9855 

9856 # sort and check for duplicates 

9857 unique_pcts = np.unique(percentiles) 

9858 if len(unique_pcts) < len(percentiles): 

9859 raise ValueError("percentiles cannot contain duplicates") 

9860 percentiles = unique_pcts 

9861 

9862 formatted_percentiles = format_percentiles(percentiles) 

9863 

9864 def describe_numeric_1d(series): 

9865 stat_index = ( 

9866 ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] 

9867 ) 

9868 d = ( 

9869 [series.count(), series.mean(), series.std(), series.min()] 

9870 + series.quantile(percentiles).tolist() 

9871 + [series.max()] 

9872 ) 

9873 return pd.Series(d, index=stat_index, name=series.name) 

9874 

9875 def describe_categorical_1d(data): 

9876 names = ["count", "unique"] 

9877 objcounts = data.value_counts() 

9878 count_unique = len(objcounts[objcounts != 0]) 

9879 result = [data.count(), count_unique] 

9880 dtype = None 

9881 if result[1] > 0: 

9882 top, freq = objcounts.index[0], objcounts.iloc[0] 

9883 

9884 if is_datetime64_any_dtype(data): 

9885 tz = data.dt.tz 

9886 asint = data.dropna().values.view("i8") 

9887 top = Timestamp(top) 

9888 if top.tzinfo is not None and tz is not None: 

9889 # Don't tz_localize(None) if key is already tz-aware 

9890 top = top.tz_convert(tz) 

9891 else: 

9892 top = top.tz_localize(tz) 

9893 names += ["top", "freq", "first", "last"] 

9894 result += [ 

9895 top, 

9896 freq, 

9897 Timestamp(asint.min(), tz=tz), 

9898 Timestamp(asint.max(), tz=tz), 

9899 ] 

9900 else: 

9901 names += ["top", "freq"] 

9902 result += [top, freq] 

9903 

9904 # If the DataFrame is empty, set 'top' and 'freq' to None 

9905 # to maintain output shape consistency 

9906 else: 

9907 names += ["top", "freq"] 

9908 result += [np.nan, np.nan] 

9909 dtype = "object" 

9910 

9911 return pd.Series(result, index=names, name=data.name, dtype=dtype) 

9912 

9913 def describe_1d(data): 

9914 if is_bool_dtype(data): 

9915 return describe_categorical_1d(data) 

9916 elif is_numeric_dtype(data): 

9917 return describe_numeric_1d(data) 

9918 elif is_timedelta64_dtype(data): 

9919 return describe_numeric_1d(data) 

9920 else: 

9921 return describe_categorical_1d(data) 

9922 

9923 if self.ndim == 1: 

9924 return describe_1d(self) 

9925 elif (include is None) and (exclude is None): 

9926 # when some numerics are found, keep only numerics 

9927 data = self.select_dtypes(include=[np.number]) 

9928 if len(data.columns) == 0: 

9929 data = self 

9930 elif include == "all": 

9931 if exclude is not None: 

9932 msg = "exclude must be None when include is 'all'" 

9933 raise ValueError(msg) 

9934 data = self 

9935 else: 

9936 data = self.select_dtypes(include=include, exclude=exclude) 

9937 

9938 ldesc = [describe_1d(s) for _, s in data.items()] 

9939 # set a convenient order for rows 

9940 names: List[Optional[Hashable]] = [] 

9941 ldesc_indexes = sorted((x.index for x in ldesc), key=len) 

9942 for idxnames in ldesc_indexes: 

9943 for name in idxnames: 

9944 if name not in names: 

9945 names.append(name) 

9946 

9947 d = pd.concat([x.reindex(names, copy=False) for x in ldesc], axis=1, sort=False) 

9948 d.columns = data.columns.copy() 

9949 return d 

9950 

9951 _shared_docs[ 

9952 "pct_change" 

9953 ] = """ 

9954 Percentage change between the current and a prior element. 

9955 

9956 Computes the percentage change from the immediately previous row by 

9957 default. This is useful in comparing the percentage of change in a time 

9958 series of elements. 

9959 

9960 Parameters 

9961 ---------- 

9962 periods : int, default 1 

9963 Periods to shift for forming percent change. 

9964 fill_method : str, default 'pad' 

9965 How to handle NAs before computing percent changes. 

9966 limit : int, default None 

9967 The number of consecutive NAs to fill before stopping. 

9968 freq : DateOffset, timedelta, or str, optional 

9969 Increment to use from time series API (e.g. 'M' or BDay()). 

9970 **kwargs 

9971 Additional keyword arguments are passed into 

9972 `DataFrame.shift` or `Series.shift`. 

9973 

9974 Returns 

9975 ------- 

9976 chg : Series or DataFrame 

9977 The same type as the calling object. 

9978 

9979 See Also 

9980 -------- 

9981 Series.diff : Compute the difference of two elements in a Series. 

9982 DataFrame.diff : Compute the difference of two elements in a DataFrame. 

9983 Series.shift : Shift the index by some number of periods. 

9984 DataFrame.shift : Shift the index by some number of periods. 

9985 

9986 Examples 

9987 -------- 

9988 **Series** 

9989 

9990 >>> s = pd.Series([90, 91, 85]) 

9991 >>> s 

9992 0 90 

9993 1 91 

9994 2 85 

9995 dtype: int64 

9996 

9997 >>> s.pct_change() 

9998 0 NaN 

9999 1 0.011111 

10000 2 -0.065934 

10001 dtype: float64 

10002 

10003 >>> s.pct_change(periods=2) 

10004 0 NaN 

10005 1 NaN 

10006 2 -0.055556 

10007 dtype: float64 

10008 

10009 See the percentage change in a Series where filling NAs with last 

10010 valid observation forward to next valid. 

10011 

10012 >>> s = pd.Series([90, 91, None, 85]) 

10013 >>> s 

10014 0 90.0 

10015 1 91.0 

10016 2 NaN 

10017 3 85.0 

10018 dtype: float64 

10019 

10020 >>> s.pct_change(fill_method='ffill') 

10021 0 NaN 

10022 1 0.011111 

10023 2 0.000000 

10024 3 -0.065934 

10025 dtype: float64 

10026 

10027 **DataFrame** 

10028 

10029 Percentage change in French franc, Deutsche Mark, and Italian lira from 

10030 1980-01-01 to 1980-03-01. 

10031 

10032 >>> df = pd.DataFrame({ 

10033 ... 'FR': [4.0405, 4.0963, 4.3149], 

10034 ... 'GR': [1.7246, 1.7482, 1.8519], 

10035 ... 'IT': [804.74, 810.01, 860.13]}, 

10036 ... index=['1980-01-01', '1980-02-01', '1980-03-01']) 

10037 >>> df 

10038 FR GR IT 

10039 1980-01-01 4.0405 1.7246 804.74 

10040 1980-02-01 4.0963 1.7482 810.01 

10041 1980-03-01 4.3149 1.8519 860.13 

10042 

10043 >>> df.pct_change() 

10044 FR GR IT 

10045 1980-01-01 NaN NaN NaN 

10046 1980-02-01 0.013810 0.013684 0.006549 

10047 1980-03-01 0.053365 0.059318 0.061876 

10048 

10049 Percentage of change in GOOG and APPL stock volume. Shows computing 

10050 the percentage change between columns. 

10051 

10052 >>> df = pd.DataFrame({ 

10053 ... '2016': [1769950, 30586265], 

10054 ... '2015': [1500923, 40912316], 

10055 ... '2014': [1371819, 41403351]}, 

10056 ... index=['GOOG', 'APPL']) 

10057 >>> df 

10058 2016 2015 2014 

10059 GOOG 1769950 1500923 1371819 

10060 APPL 30586265 40912316 41403351 

10061 

10062 >>> df.pct_change(axis='columns') 

10063 2016 2015 2014 

10064 GOOG NaN -0.151997 -0.086016 

10065 APPL NaN 0.337604 0.012002 

10066 """ 

10067 

10068 @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) 

10069 def pct_change( 

10070 self: FrameOrSeries, 

10071 periods=1, 

10072 fill_method="pad", 

10073 limit=None, 

10074 freq=None, 

10075 **kwargs, 

10076 ) -> FrameOrSeries: 

10077 # TODO: Not sure if above is correct - need someone to confirm. 

10078 axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) 

10079 if fill_method is None: 

10080 data = self 

10081 else: 

10082 _data = self.fillna(method=fill_method, axis=axis, limit=limit) 

10083 assert _data is not None # needed for mypy 

10084 data = _data 

10085 

10086 rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 

10087 if freq is not None: 

10088 # Shift method is implemented differently when freq is not None 

10089 # We want to restore the original index 

10090 rs = rs.loc[~rs.index.duplicated()] 

10091 rs = rs.reindex_like(data) 

10092 return rs 

10093 

10094 def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): 

10095 if axis is None: 

10096 raise ValueError("Must specify 'axis' when aggregating by level.") 

10097 grouped = self.groupby(level=level, axis=axis, sort=False) 

10098 if hasattr(grouped, name) and skipna: 

10099 return getattr(grouped, name)(**kwargs) 

10100 axis = self._get_axis_number(axis) 

10101 method = getattr(type(self), name) 

10102 applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) 

10103 return grouped.aggregate(applyf) 

10104 

10105 @classmethod 

10106 def _add_numeric_operations(cls): 

10107 """ 

10108 Add the operations to the cls; evaluate the doc strings again 

10109 """ 

10110 

10111 axis_descr, name, name2 = _doc_parms(cls) 

10112 

10113 cls.any = _make_logical_function( 

10114 cls, 

10115 "any", 

10116 name, 

10117 name2, 

10118 axis_descr, 

10119 _any_desc, 

10120 nanops.nanany, 

10121 _any_see_also, 

10122 _any_examples, 

10123 empty_value=False, 

10124 ) 

10125 cls.all = _make_logical_function( 

10126 cls, 

10127 "all", 

10128 name, 

10129 name2, 

10130 axis_descr, 

10131 _all_desc, 

10132 nanops.nanall, 

10133 _all_see_also, 

10134 _all_examples, 

10135 empty_value=True, 

10136 ) 

10137 

10138 @Substitution( 

10139 desc="Return the mean absolute deviation of the values " 

10140 "for the requested axis.", 

10141 name1=name, 

10142 name2=name2, 

10143 axis_descr=axis_descr, 

10144 min_count="", 

10145 see_also="", 

10146 examples="", 

10147 ) 

10148 @Appender(_num_doc) 

10149 def mad(self, axis=None, skipna=None, level=None): 

10150 if skipna is None: 

10151 skipna = True 

10152 if axis is None: 

10153 axis = self._stat_axis_number 

10154 if level is not None: 

10155 return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) 

10156 

10157 data = self._get_numeric_data() 

10158 if axis == 0: 

10159 demeaned = data - data.mean(axis=0) 

10160 else: 

10161 demeaned = data.sub(data.mean(axis=1), axis=0) 

10162 return np.abs(demeaned).mean(axis=axis, skipna=skipna) 

10163 

10164 cls.mad = mad 

10165 

10166 cls.sem = _make_stat_function_ddof( 

10167 cls, 

10168 "sem", 

10169 name, 

10170 name2, 

10171 axis_descr, 

10172 "Return unbiased standard error of the mean over requested " 

10173 "axis.\n\nNormalized by N-1 by default. This can be changed " 

10174 "using the ddof argument", 

10175 nanops.nansem, 

10176 ) 

10177 cls.var = _make_stat_function_ddof( 

10178 cls, 

10179 "var", 

10180 name, 

10181 name2, 

10182 axis_descr, 

10183 "Return unbiased variance over requested axis.\n\nNormalized by " 

10184 "N-1 by default. This can be changed using the ddof argument", 

10185 nanops.nanvar, 

10186 ) 

10187 cls.std = _make_stat_function_ddof( 

10188 cls, 

10189 "std", 

10190 name, 

10191 name2, 

10192 axis_descr, 

10193 "Return sample standard deviation over requested axis." 

10194 "\n\nNormalized by N-1 by default. This can be changed using the " 

10195 "ddof argument", 

10196 nanops.nanstd, 

10197 ) 

10198 

10199 cls.cummin = _make_cum_function( 

10200 cls, 

10201 "cummin", 

10202 name, 

10203 name2, 

10204 axis_descr, 

10205 "minimum", 

10206 np.minimum.accumulate, 

10207 "min", 

10208 np.inf, 

10209 np.nan, 

10210 _cummin_examples, 

10211 ) 

10212 cls.cumsum = _make_cum_function( 

10213 cls, 

10214 "cumsum", 

10215 name, 

10216 name2, 

10217 axis_descr, 

10218 "sum", 

10219 np.cumsum, 

10220 "sum", 

10221 0.0, 

10222 np.nan, 

10223 _cumsum_examples, 

10224 ) 

10225 cls.cumprod = _make_cum_function( 

10226 cls, 

10227 "cumprod", 

10228 name, 

10229 name2, 

10230 axis_descr, 

10231 "product", 

10232 np.cumprod, 

10233 "prod", 

10234 1.0, 

10235 np.nan, 

10236 _cumprod_examples, 

10237 ) 

10238 cls.cummax = _make_cum_function( 

10239 cls, 

10240 "cummax", 

10241 name, 

10242 name2, 

10243 axis_descr, 

10244 "maximum", 

10245 np.maximum.accumulate, 

10246 "max", 

10247 -np.inf, 

10248 np.nan, 

10249 _cummax_examples, 

10250 ) 

10251 

10252 cls.sum = _make_min_count_stat_function( 

10253 cls, 

10254 "sum", 

10255 name, 

10256 name2, 

10257 axis_descr, 

10258 """Return the sum of the values for the requested axis.\n 

10259 This is equivalent to the method ``numpy.sum``.""", 

10260 nanops.nansum, 

10261 _stat_func_see_also, 

10262 _sum_examples, 

10263 ) 

10264 cls.mean = _make_stat_function( 

10265 cls, 

10266 "mean", 

10267 name, 

10268 name2, 

10269 axis_descr, 

10270 "Return the mean of the values for the requested axis.", 

10271 nanops.nanmean, 

10272 ) 

10273 cls.skew = _make_stat_function( 

10274 cls, 

10275 "skew", 

10276 name, 

10277 name2, 

10278 axis_descr, 

10279 "Return unbiased skew over requested axis.\n\nNormalized by N-1.", 

10280 nanops.nanskew, 

10281 ) 

10282 cls.kurt = _make_stat_function( 

10283 cls, 

10284 "kurt", 

10285 name, 

10286 name2, 

10287 axis_descr, 

10288 "Return unbiased kurtosis over requested axis.\n\n" 

10289 "Kurtosis obtained using Fisher's definition of\n" 

10290 "kurtosis (kurtosis of normal == 0.0). Normalized " 

10291 "by N-1.", 

10292 nanops.nankurt, 

10293 ) 

10294 cls.kurtosis = cls.kurt 

10295 cls.prod = _make_min_count_stat_function( 

10296 cls, 

10297 "prod", 

10298 name, 

10299 name2, 

10300 axis_descr, 

10301 "Return the product of the values for the requested axis.", 

10302 nanops.nanprod, 

10303 examples=_prod_examples, 

10304 ) 

10305 cls.product = cls.prod 

10306 cls.median = _make_stat_function( 

10307 cls, 

10308 "median", 

10309 name, 

10310 name2, 

10311 axis_descr, 

10312 "Return the median of the values for the requested axis.", 

10313 nanops.nanmedian, 

10314 ) 

10315 cls.max = _make_stat_function( 

10316 cls, 

10317 "max", 

10318 name, 

10319 name2, 

10320 axis_descr, 

10321 """Return the maximum of the values for the requested axis.\n 

10322 If you want the *index* of the maximum, use ``idxmax``. This is 

10323 the equivalent of the ``numpy.ndarray`` method ``argmax``.""", 

10324 nanops.nanmax, 

10325 _stat_func_see_also, 

10326 _max_examples, 

10327 ) 

10328 cls.min = _make_stat_function( 

10329 cls, 

10330 "min", 

10331 name, 

10332 name2, 

10333 axis_descr, 

10334 """Return the minimum of the values for the requested axis.\n 

10335 If you want the *index* of the minimum, use ``idxmin``. This is 

10336 the equivalent of the ``numpy.ndarray`` method ``argmin``.""", 

10337 nanops.nanmin, 

10338 _stat_func_see_also, 

10339 _min_examples, 

10340 ) 

10341 

10342 @classmethod 

10343 def _add_series_or_dataframe_operations(cls): 

10344 """ 

10345 Add the series or dataframe only operations to the cls; evaluate 

10346 the doc strings again. 

10347 """ 

10348 

10349 from pandas.core.window import EWM, Expanding, Rolling, Window 

10350 

10351 @Appender(Rolling.__doc__) 

10352 def rolling( 

10353 self, 

10354 window, 

10355 min_periods=None, 

10356 center=False, 

10357 win_type=None, 

10358 on=None, 

10359 axis=0, 

10360 closed=None, 

10361 ): 

10362 axis = self._get_axis_number(axis) 

10363 

10364 if win_type is not None: 

10365 return Window( 

10366 self, 

10367 window=window, 

10368 min_periods=min_periods, 

10369 center=center, 

10370 win_type=win_type, 

10371 on=on, 

10372 axis=axis, 

10373 closed=closed, 

10374 ) 

10375 

10376 return Rolling( 

10377 self, 

10378 window=window, 

10379 min_periods=min_periods, 

10380 center=center, 

10381 win_type=win_type, 

10382 on=on, 

10383 axis=axis, 

10384 closed=closed, 

10385 ) 

10386 

10387 cls.rolling = rolling 

10388 

10389 @Appender(Expanding.__doc__) 

10390 def expanding(self, min_periods=1, center=False, axis=0): 

10391 axis = self._get_axis_number(axis) 

10392 return Expanding(self, min_periods=min_periods, center=center, axis=axis) 

10393 

10394 cls.expanding = expanding 

10395 

10396 @Appender(EWM.__doc__) 

10397 def ewm( 

10398 self, 

10399 com=None, 

10400 span=None, 

10401 halflife=None, 

10402 alpha=None, 

10403 min_periods=0, 

10404 adjust=True, 

10405 ignore_na=False, 

10406 axis=0, 

10407 ): 

10408 axis = self._get_axis_number(axis) 

10409 return EWM( 

10410 self, 

10411 com=com, 

10412 span=span, 

10413 halflife=halflife, 

10414 alpha=alpha, 

10415 min_periods=min_periods, 

10416 adjust=adjust, 

10417 ignore_na=ignore_na, 

10418 axis=axis, 

10419 ) 

10420 

10421 cls.ewm = ewm 

10422 

10423 @Appender(_shared_docs["transform"] % dict(axis="", **_shared_doc_kwargs)) 

10424 def transform(self, func, *args, **kwargs): 

10425 result = self.agg(func, *args, **kwargs) 

10426 if is_scalar(result) or len(result) != len(self): 

10427 raise ValueError("transforms cannot produce aggregated results") 

10428 

10429 return result 

10430 

10431 # ---------------------------------------------------------------------- 

10432 # Misc methods 

10433 

10434 _shared_docs[ 

10435 "valid_index" 

10436 ] = """ 

10437 Return index for %(position)s non-NA/null value. 

10438 

10439 Returns 

10440 ------- 

10441 scalar : type of index 

10442 

10443 Notes 

10444 ----- 

10445 If all elements are non-NA/null, returns None. 

10446 Also returns None for empty %(klass)s. 

10447 """ 

10448 

10449 def _find_valid_index(self, how: str): 

10450 """ 

10451 Retrieves the index of the first valid value. 

10452 

10453 Parameters 

10454 ---------- 

10455 how : {'first', 'last'} 

10456 Use this parameter to change between the first or last valid index. 

10457 

10458 Returns 

10459 ------- 

10460 idx_first_valid : type of index 

10461 """ 

10462 

10463 idxpos = find_valid_index(self._values, how) 

10464 if idxpos is None: 

10465 return None 

10466 return self.index[idxpos] 

10467 

10468 @Appender( 

10469 _shared_docs["valid_index"] % {"position": "first", "klass": "Series/DataFrame"} 

10470 ) 

10471 def first_valid_index(self): 

10472 return self._find_valid_index("first") 

10473 

10474 @Appender( 

10475 _shared_docs["valid_index"] % {"position": "last", "klass": "Series/DataFrame"} 

10476 ) 

10477 def last_valid_index(self): 

10478 return self._find_valid_index("last") 

10479 

10480 

10481def _doc_parms(cls): 

10482 """Return a tuple of the doc parms.""" 

10483 axis_descr = ( 

10484 f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}" 

10485 ) 

10486 name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" 

10487 name2 = cls.__name__ 

10488 return axis_descr, name, name2 

10489 

10490 

10491_num_doc = """ 

10492%(desc)s 

10493 

10494Parameters 

10495---------- 

10496axis : %(axis_descr)s 

10497 Axis for the function to be applied on. 

10498skipna : bool, default True 

10499 Exclude NA/null values when computing the result. 

10500level : int or level name, default None 

10501 If the axis is a MultiIndex (hierarchical), count along a 

10502 particular level, collapsing into a %(name1)s. 

10503numeric_only : bool, default None 

10504 Include only float, int, boolean columns. If None, will attempt to use 

10505 everything, then use only numeric data. Not implemented for Series. 

10506%(min_count)s\ 

10507**kwargs 

10508 Additional keyword arguments to be passed to the function. 

10509 

10510Returns 

10511------- 

10512%(name1)s or %(name2)s (if level specified)\ 

10513%(see_also)s\ 

10514%(examples)s 

10515""" 

10516 

10517_num_ddof_doc = """ 

10518%(desc)s 

10519 

10520Parameters 

10521---------- 

10522axis : %(axis_descr)s 

10523skipna : bool, default True 

10524 Exclude NA/null values. If an entire row/column is NA, the result 

10525 will be NA. 

10526level : int or level name, default None 

10527 If the axis is a MultiIndex (hierarchical), count along a 

10528 particular level, collapsing into a %(name1)s. 

10529ddof : int, default 1 

10530 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

10531 where N represents the number of elements. 

10532numeric_only : bool, default None 

10533 Include only float, int, boolean columns. If None, will attempt to use 

10534 everything, then use only numeric data. Not implemented for Series. 

10535 

10536Returns 

10537------- 

10538%(name1)s or %(name2)s (if level specified)\n""" 

10539 

10540_bool_doc = """ 

10541%(desc)s 

10542 

10543Parameters 

10544---------- 

10545axis : {0 or 'index', 1 or 'columns', None}, default 0 

10546 Indicate which axis or axes should be reduced. 

10547 

10548 * 0 / 'index' : reduce the index, return a Series whose index is the 

10549 original column labels. 

10550 * 1 / 'columns' : reduce the columns, return a Series whose index is the 

10551 original index. 

10552 * None : reduce all axes, return a scalar. 

10553 

10554bool_only : bool, default None 

10555 Include only boolean columns. If None, will attempt to use everything, 

10556 then use only boolean data. Not implemented for Series. 

10557skipna : bool, default True 

10558 Exclude NA/null values. If the entire row/column is NA and skipna is 

10559 True, then the result will be %(empty_value)s, as for an empty row/column. 

10560 If skipna is False, then NA are treated as True, because these are not 

10561 equal to zero. 

10562level : int or level name, default None 

10563 If the axis is a MultiIndex (hierarchical), count along a 

10564 particular level, collapsing into a %(name1)s. 

10565**kwargs : any, default None 

10566 Additional keywords have no effect but might be accepted for 

10567 compatibility with NumPy. 

10568 

10569Returns 

10570------- 

10571%(name1)s or %(name2)s 

10572 If level is specified, then, %(name2)s is returned; otherwise, %(name1)s 

10573 is returned. 

10574 

10575%(see_also)s 

10576%(examples)s""" 

10577 

10578_all_desc = """\ 

10579Return whether all elements are True, potentially over an axis. 

10580 

10581Returns True unless there at least one element within a series or 

10582along a Dataframe axis that is False or equivalent (e.g. zero or 

10583empty).""" 

10584 

10585_all_examples = """\ 

10586Examples 

10587-------- 

10588**Series** 

10589 

10590>>> pd.Series([True, True]).all() 

10591True 

10592>>> pd.Series([True, False]).all() 

10593False 

10594>>> pd.Series([]).all() 

10595True 

10596>>> pd.Series([np.nan]).all() 

10597True 

10598>>> pd.Series([np.nan]).all(skipna=False) 

10599True 

10600 

10601**DataFrames** 

10602 

10603Create a dataframe from a dictionary. 

10604 

10605>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) 

10606>>> df 

10607 col1 col2 

106080 True True 

106091 True False 

10610 

10611Default behaviour checks if column-wise values all return True. 

10612 

10613>>> df.all() 

10614col1 True 

10615col2 False 

10616dtype: bool 

10617 

10618Specify ``axis='columns'`` to check if row-wise values all return True. 

10619 

10620>>> df.all(axis='columns') 

106210 True 

106221 False 

10623dtype: bool 

10624 

10625Or ``axis=None`` for whether every value is True. 

10626 

10627>>> df.all(axis=None) 

10628False 

10629""" 

10630 

10631_all_see_also = """\ 

10632See Also 

10633-------- 

10634Series.all : Return True if all elements are True. 

10635DataFrame.any : Return True if one (or more) elements are True. 

10636""" 

10637 

10638_cnum_doc = """ 

10639Return cumulative %(desc)s over a DataFrame or Series axis. 

10640 

10641Returns a DataFrame or Series of the same size containing the cumulative 

10642%(desc)s. 

10643 

10644Parameters 

10645---------- 

10646axis : {0 or 'index', 1 or 'columns'}, default 0 

10647 The index or the name of the axis. 0 is equivalent to None or 'index'. 

10648skipna : bool, default True 

10649 Exclude NA/null values. If an entire row/column is NA, the result 

10650 will be NA. 

10651*args, **kwargs : 

10652 Additional keywords have no effect but might be accepted for 

10653 compatibility with NumPy. 

10654 

10655Returns 

10656------- 

10657%(name1)s or %(name2)s 

10658 

10659See Also 

10660-------- 

10661core.window.Expanding.%(accum_func_name)s : Similar functionality 

10662 but ignores ``NaN`` values. 

10663%(name2)s.%(accum_func_name)s : Return the %(desc)s over 

10664 %(name2)s axis. 

10665%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. 

10666%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. 

10667%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. 

10668%(name2)s.cumprod : Return cumulative product over %(name2)s axis. 

10669 

10670%(examples)s""" 

10671 

10672_cummin_examples = """\ 

10673Examples 

10674-------- 

10675**Series** 

10676 

10677>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

10678>>> s 

106790 2.0 

106801 NaN 

106812 5.0 

106823 -1.0 

106834 0.0 

10684dtype: float64 

10685 

10686By default, NA values are ignored. 

10687 

10688>>> s.cummin() 

106890 2.0 

106901 NaN 

106912 2.0 

106923 -1.0 

106934 -1.0 

10694dtype: float64 

10695 

10696To include NA values in the operation, use ``skipna=False`` 

10697 

10698>>> s.cummin(skipna=False) 

106990 2.0 

107001 NaN 

107012 NaN 

107023 NaN 

107034 NaN 

10704dtype: float64 

10705 

10706**DataFrame** 

10707 

10708>>> df = pd.DataFrame([[2.0, 1.0], 

10709... [3.0, np.nan], 

10710... [1.0, 0.0]], 

10711... columns=list('AB')) 

10712>>> df 

10713 A B 

107140 2.0 1.0 

107151 3.0 NaN 

107162 1.0 0.0 

10717 

10718By default, iterates over rows and finds the minimum 

10719in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

10720 

10721>>> df.cummin() 

10722 A B 

107230 2.0 1.0 

107241 2.0 NaN 

107252 1.0 0.0 

10726 

10727To iterate over columns and find the minimum in each row, 

10728use ``axis=1`` 

10729 

10730>>> df.cummin(axis=1) 

10731 A B 

107320 2.0 1.0 

107331 3.0 NaN 

107342 1.0 0.0 

10735""" 

10736 

10737_cumsum_examples = """\ 

10738Examples 

10739-------- 

10740**Series** 

10741 

10742>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

10743>>> s 

107440 2.0 

107451 NaN 

107462 5.0 

107473 -1.0 

107484 0.0 

10749dtype: float64 

10750 

10751By default, NA values are ignored. 

10752 

10753>>> s.cumsum() 

107540 2.0 

107551 NaN 

107562 7.0 

107573 6.0 

107584 6.0 

10759dtype: float64 

10760 

10761To include NA values in the operation, use ``skipna=False`` 

10762 

10763>>> s.cumsum(skipna=False) 

107640 2.0 

107651 NaN 

107662 NaN 

107673 NaN 

107684 NaN 

10769dtype: float64 

10770 

10771**DataFrame** 

10772 

10773>>> df = pd.DataFrame([[2.0, 1.0], 

10774... [3.0, np.nan], 

10775... [1.0, 0.0]], 

10776... columns=list('AB')) 

10777>>> df 

10778 A B 

107790 2.0 1.0 

107801 3.0 NaN 

107812 1.0 0.0 

10782 

10783By default, iterates over rows and finds the sum 

10784in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

10785 

10786>>> df.cumsum() 

10787 A B 

107880 2.0 1.0 

107891 5.0 NaN 

107902 6.0 1.0 

10791 

10792To iterate over columns and find the sum in each row, 

10793use ``axis=1`` 

10794 

10795>>> df.cumsum(axis=1) 

10796 A B 

107970 2.0 3.0 

107981 3.0 NaN 

107992 1.0 1.0 

10800""" 

10801 

10802_cumprod_examples = """\ 

10803Examples 

10804-------- 

10805**Series** 

10806 

10807>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

10808>>> s 

108090 2.0 

108101 NaN 

108112 5.0 

108123 -1.0 

108134 0.0 

10814dtype: float64 

10815 

10816By default, NA values are ignored. 

10817 

10818>>> s.cumprod() 

108190 2.0 

108201 NaN 

108212 10.0 

108223 -10.0 

108234 -0.0 

10824dtype: float64 

10825 

10826To include NA values in the operation, use ``skipna=False`` 

10827 

10828>>> s.cumprod(skipna=False) 

108290 2.0 

108301 NaN 

108312 NaN 

108323 NaN 

108334 NaN 

10834dtype: float64 

10835 

10836**DataFrame** 

10837 

10838>>> df = pd.DataFrame([[2.0, 1.0], 

10839... [3.0, np.nan], 

10840... [1.0, 0.0]], 

10841... columns=list('AB')) 

10842>>> df 

10843 A B 

108440 2.0 1.0 

108451 3.0 NaN 

108462 1.0 0.0 

10847 

10848By default, iterates over rows and finds the product 

10849in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

10850 

10851>>> df.cumprod() 

10852 A B 

108530 2.0 1.0 

108541 6.0 NaN 

108552 6.0 0.0 

10856 

10857To iterate over columns and find the product in each row, 

10858use ``axis=1`` 

10859 

10860>>> df.cumprod(axis=1) 

10861 A B 

108620 2.0 2.0 

108631 3.0 NaN 

108642 1.0 0.0 

10865""" 

10866 

10867_cummax_examples = """\ 

10868Examples 

10869-------- 

10870**Series** 

10871 

10872>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

10873>>> s 

108740 2.0 

108751 NaN 

108762 5.0 

108773 -1.0 

108784 0.0 

10879dtype: float64 

10880 

10881By default, NA values are ignored. 

10882 

10883>>> s.cummax() 

108840 2.0 

108851 NaN 

108862 5.0 

108873 5.0 

108884 5.0 

10889dtype: float64 

10890 

10891To include NA values in the operation, use ``skipna=False`` 

10892 

10893>>> s.cummax(skipna=False) 

108940 2.0 

108951 NaN 

108962 NaN 

108973 NaN 

108984 NaN 

10899dtype: float64 

10900 

10901**DataFrame** 

10902 

10903>>> df = pd.DataFrame([[2.0, 1.0], 

10904... [3.0, np.nan], 

10905... [1.0, 0.0]], 

10906... columns=list('AB')) 

10907>>> df 

10908 A B 

109090 2.0 1.0 

109101 3.0 NaN 

109112 1.0 0.0 

10912 

10913By default, iterates over rows and finds the maximum 

10914in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

10915 

10916>>> df.cummax() 

10917 A B 

109180 2.0 1.0 

109191 3.0 NaN 

109202 3.0 1.0 

10921 

10922To iterate over columns and find the maximum in each row, 

10923use ``axis=1`` 

10924 

10925>>> df.cummax(axis=1) 

10926 A B 

109270 2.0 2.0 

109281 3.0 NaN 

109292 1.0 1.0 

10930""" 

10931 

10932_any_see_also = """\ 

10933See Also 

10934-------- 

10935numpy.any : Numpy version of this method. 

10936Series.any : Return whether any element is True. 

10937Series.all : Return whether all elements are True. 

10938DataFrame.any : Return whether any element is True over requested axis. 

10939DataFrame.all : Return whether all elements are True over requested axis. 

10940""" 

10941 

10942_any_desc = """\ 

10943Return whether any element is True, potentially over an axis. 

10944 

10945Returns False unless there at least one element within a series or 

10946along a Dataframe axis that is True or equivalent (e.g. non-zero or 

10947non-empty).""" 

10948 

10949_any_examples = """\ 

10950Examples 

10951-------- 

10952**Series** 

10953 

10954For Series input, the output is a scalar indicating whether any element 

10955is True. 

10956 

10957>>> pd.Series([False, False]).any() 

10958False 

10959>>> pd.Series([True, False]).any() 

10960True 

10961>>> pd.Series([]).any() 

10962False 

10963>>> pd.Series([np.nan]).any() 

10964False 

10965>>> pd.Series([np.nan]).any(skipna=False) 

10966True 

10967 

10968**DataFrame** 

10969 

10970Whether each column contains at least one True element (the default). 

10971 

10972>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) 

10973>>> df 

10974 A B C 

109750 1 0 0 

109761 2 2 0 

10977 

10978>>> df.any() 

10979A True 

10980B True 

10981C False 

10982dtype: bool 

10983 

10984Aggregating over the columns. 

10985 

10986>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) 

10987>>> df 

10988 A B 

109890 True 1 

109901 False 2 

10991 

10992>>> df.any(axis='columns') 

109930 True 

109941 True 

10995dtype: bool 

10996 

10997>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) 

10998>>> df 

10999 A B 

110000 True 1 

110011 False 0 

11002 

11003>>> df.any(axis='columns') 

110040 True 

110051 False 

11006dtype: bool 

11007 

11008Aggregating over the entire DataFrame with ``axis=None``. 

11009 

11010>>> df.any(axis=None) 

11011True 

11012 

11013`any` for an empty DataFrame is an empty Series. 

11014 

11015>>> pd.DataFrame([]).any() 

11016Series([], dtype: bool) 

11017""" 

11018 

11019_shared_docs[ 

11020 "stat_func_example" 

11021] = """ 

11022 

11023Examples 

11024-------- 

11025>>> idx = pd.MultiIndex.from_arrays([ 

11026... ['warm', 'warm', 'cold', 'cold'], 

11027... ['dog', 'falcon', 'fish', 'spider']], 

11028... names=['blooded', 'animal']) 

11029>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) 

11030>>> s 

11031blooded animal 

11032warm dog 4 

11033 falcon 2 

11034cold fish 0 

11035 spider 8 

11036Name: legs, dtype: int64 

11037 

11038>>> s.{stat_func}() 

11039{default_output} 

11040 

11041{verb} using level names, as well as indices. 

11042 

11043>>> s.{stat_func}(level='blooded') 

11044blooded 

11045warm {level_output_0} 

11046cold {level_output_1} 

11047Name: legs, dtype: int64 

11048 

11049>>> s.{stat_func}(level=0) 

11050blooded 

11051warm {level_output_0} 

11052cold {level_output_1} 

11053Name: legs, dtype: int64""" 

11054 

11055_sum_examples = _shared_docs["stat_func_example"].format( 

11056 stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 

11057) 

11058 

11059_sum_examples += """ 

11060 

11061By default, the sum of an empty or all-NA Series is ``0``. 

11062 

11063>>> pd.Series([]).sum() # min_count=0 is the default 

110640.0 

11065 

11066This can be controlled with the ``min_count`` parameter. For example, if 

11067you'd like the sum of an empty series to be NaN, pass ``min_count=1``. 

11068 

11069>>> pd.Series([]).sum(min_count=1) 

11070nan 

11071 

11072Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

11073empty series identically. 

11074 

11075>>> pd.Series([np.nan]).sum() 

110760.0 

11077 

11078>>> pd.Series([np.nan]).sum(min_count=1) 

11079nan""" 

11080 

11081_max_examples = _shared_docs["stat_func_example"].format( 

11082 stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 

11083) 

11084 

11085_min_examples = _shared_docs["stat_func_example"].format( 

11086 stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 

11087) 

11088 

11089_stat_func_see_also = """ 

11090 

11091See Also 

11092-------- 

11093Series.sum : Return the sum. 

11094Series.min : Return the minimum. 

11095Series.max : Return the maximum. 

11096Series.idxmin : Return the index of the minimum. 

11097Series.idxmax : Return the index of the maximum. 

11098DataFrame.sum : Return the sum over the requested axis. 

11099DataFrame.min : Return the minimum over the requested axis. 

11100DataFrame.max : Return the maximum over the requested axis. 

11101DataFrame.idxmin : Return the index of the minimum over the requested axis. 

11102DataFrame.idxmax : Return the index of the maximum over the requested axis.""" 

11103 

11104_prod_examples = """ 

11105 

11106Examples 

11107-------- 

11108By default, the product of an empty or all-NA Series is ``1`` 

11109 

11110>>> pd.Series([]).prod() 

111111.0 

11112 

11113This can be controlled with the ``min_count`` parameter 

11114 

11115>>> pd.Series([]).prod(min_count=1) 

11116nan 

11117 

11118Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

11119empty series identically. 

11120 

11121>>> pd.Series([np.nan]).prod() 

111221.0 

11123 

11124>>> pd.Series([np.nan]).prod(min_count=1) 

11125nan""" 

11126 

11127_min_count_stub = """\ 

11128min_count : int, default 0 

11129 The required number of valid values to perform the operation. If fewer than 

11130 ``min_count`` non-NA values are present the result will be NA. 

11131 

11132 .. versionadded:: 0.22.0 

11133 

11134 Added with the default being 0. This means the sum of an all-NA 

11135 or empty Series is 0, and the product of an all-NA or empty 

11136 Series is 1. 

11137""" 

11138 

11139 

11140def _make_min_count_stat_function( 

11141 cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" 

11142): 

11143 @Substitution( 

11144 desc=desc, 

11145 name1=name1, 

11146 name2=name2, 

11147 axis_descr=axis_descr, 

11148 min_count=_min_count_stub, 

11149 see_also=see_also, 

11150 examples=examples, 

11151 ) 

11152 @Appender(_num_doc) 

11153 def stat_func( 

11154 self, 

11155 axis=None, 

11156 skipna=None, 

11157 level=None, 

11158 numeric_only=None, 

11159 min_count=0, 

11160 **kwargs, 

11161 ): 

11162 if name == "sum": 

11163 nv.validate_sum(tuple(), kwargs) 

11164 elif name == "prod": 

11165 nv.validate_prod(tuple(), kwargs) 

11166 else: 

11167 nv.validate_stat_func(tuple(), kwargs, fname=name) 

11168 if skipna is None: 

11169 skipna = True 

11170 if axis is None: 

11171 axis = self._stat_axis_number 

11172 if level is not None: 

11173 return self._agg_by_level( 

11174 name, axis=axis, level=level, skipna=skipna, min_count=min_count 

11175 ) 

11176 return self._reduce( 

11177 f, 

11178 name, 

11179 axis=axis, 

11180 skipna=skipna, 

11181 numeric_only=numeric_only, 

11182 min_count=min_count, 

11183 ) 

11184 

11185 return set_function_name(stat_func, name, cls) 

11186 

11187 

11188def _make_stat_function( 

11189 cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" 

11190): 

11191 @Substitution( 

11192 desc=desc, 

11193 name1=name1, 

11194 name2=name2, 

11195 axis_descr=axis_descr, 

11196 min_count="", 

11197 see_also=see_also, 

11198 examples=examples, 

11199 ) 

11200 @Appender(_num_doc) 

11201 def stat_func( 

11202 self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs 

11203 ): 

11204 if name == "median": 

11205 nv.validate_median(tuple(), kwargs) 

11206 else: 

11207 nv.validate_stat_func(tuple(), kwargs, fname=name) 

11208 if skipna is None: 

11209 skipna = True 

11210 if axis is None: 

11211 axis = self._stat_axis_number 

11212 if level is not None: 

11213 return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) 

11214 return self._reduce( 

11215 f, name, axis=axis, skipna=skipna, numeric_only=numeric_only 

11216 ) 

11217 

11218 return set_function_name(stat_func, name, cls) 

11219 

11220 

11221def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): 

11222 @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) 

11223 @Appender(_num_ddof_doc) 

11224 def stat_func( 

11225 self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs 

11226 ): 

11227 nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) 

11228 if skipna is None: 

11229 skipna = True 

11230 if axis is None: 

11231 axis = self._stat_axis_number 

11232 if level is not None: 

11233 return self._agg_by_level( 

11234 name, axis=axis, level=level, skipna=skipna, ddof=ddof 

11235 ) 

11236 return self._reduce( 

11237 f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof 

11238 ) 

11239 

11240 return set_function_name(stat_func, name, cls) 

11241 

11242 

11243def _make_cum_function( 

11244 cls, 

11245 name, 

11246 name1, 

11247 name2, 

11248 axis_descr, 

11249 desc, 

11250 accum_func, 

11251 accum_func_name, 

11252 mask_a, 

11253 mask_b, 

11254 examples, 

11255): 

11256 @Substitution( 

11257 desc=desc, 

11258 name1=name1, 

11259 name2=name2, 

11260 axis_descr=axis_descr, 

11261 accum_func_name=accum_func_name, 

11262 examples=examples, 

11263 ) 

11264 @Appender(_cnum_doc) 

11265 def cum_func(self, axis=None, skipna=True, *args, **kwargs): 

11266 skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) 

11267 if axis is None: 

11268 axis = self._stat_axis_number 

11269 else: 

11270 axis = self._get_axis_number(axis) 

11271 

11272 if axis == 1: 

11273 return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T 

11274 

11275 def na_accum_func(blk_values): 

11276 # We will be applying this function to block values 

11277 if blk_values.dtype.kind in ["m", "M"]: 

11278 # GH#30460, GH#29058 

11279 # numpy 1.18 started sorting NaTs at the end instead of beginning, 

11280 # so we need to work around to maintain backwards-consistency. 

11281 orig_dtype = blk_values.dtype 

11282 

11283 # We need to define mask before masking NaTs 

11284 mask = isna(blk_values) 

11285 

11286 if accum_func == np.minimum.accumulate: 

11287 # Note: the accum_func comparison fails as an "is" comparison 

11288 y = blk_values.view("i8") 

11289 y[mask] = np.iinfo(np.int64).max 

11290 changed = True 

11291 else: 

11292 y = blk_values 

11293 changed = False 

11294 

11295 result = accum_func(y.view("i8"), axis) 

11296 if skipna: 

11297 np.putmask(result, mask, iNaT) 

11298 elif accum_func == np.minimum.accumulate: 

11299 # Restore NaTs that we masked previously 

11300 nz = (~np.asarray(mask)).nonzero()[0] 

11301 if len(nz): 

11302 # everything up to the first non-na entry stays NaT 

11303 result[: nz[0]] = iNaT 

11304 

11305 if changed: 

11306 # restore NaT elements 

11307 y[mask] = iNaT # TODO: could try/finally for this? 

11308 

11309 if isinstance(blk_values, np.ndarray): 

11310 result = result.view(orig_dtype) 

11311 else: 

11312 # DatetimeArray 

11313 result = type(blk_values)._from_sequence(result, dtype=orig_dtype) 

11314 

11315 elif skipna and not issubclass( 

11316 blk_values.dtype.type, (np.integer, np.bool_) 

11317 ): 

11318 vals = blk_values.copy().T 

11319 mask = isna(vals) 

11320 np.putmask(vals, mask, mask_a) 

11321 result = accum_func(vals, axis) 

11322 np.putmask(result, mask, mask_b) 

11323 else: 

11324 result = accum_func(blk_values.T, axis) 

11325 

11326 # transpose back for ndarray, not for EA 

11327 return result.T if hasattr(result, "T") else result 

11328 

11329 result = self._data.apply(na_accum_func) 

11330 

11331 d = self._construct_axes_dict() 

11332 d["copy"] = False 

11333 return self._constructor(result, **d).__finalize__(self) 

11334 

11335 return set_function_name(cum_func, name, cls) 

11336 

11337 

11338def _make_logical_function( 

11339 cls, name, name1, name2, axis_descr, desc, f, see_also, examples, empty_value 

11340): 

11341 @Substitution( 

11342 desc=desc, 

11343 name1=name1, 

11344 name2=name2, 

11345 axis_descr=axis_descr, 

11346 see_also=see_also, 

11347 examples=examples, 

11348 empty_value=empty_value, 

11349 ) 

11350 @Appender(_bool_doc) 

11351 def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): 

11352 nv.validate_logical_func(tuple(), kwargs, fname=name) 

11353 if level is not None: 

11354 if bool_only is not None: 

11355 raise NotImplementedError( 

11356 "Option bool_only is not implemented with option level." 

11357 ) 

11358 return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) 

11359 return self._reduce( 

11360 f, 

11361 name, 

11362 axis=axis, 

11363 skipna=skipna, 

11364 numeric_only=bool_only, 

11365 filter_type="bool", 

11366 ) 

11367 

11368 return set_function_name(logical_func, name, cls)