Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from collections import defaultdict 

2from functools import partial 

3import itertools 

4import operator 

5import re 

6from typing import List, Optional, Sequence, Tuple, Union 

7 

8import numpy as np 

9 

10from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib 

11from pandas.util._validators import validate_bool_kwarg 

12 

13from pandas.core.dtypes.cast import ( 

14 find_common_type, 

15 infer_dtype_from_scalar, 

16 maybe_convert_objects, 

17 maybe_promote, 

18) 

19from pandas.core.dtypes.common import ( 

20 _NS_DTYPE, 

21 is_datetimelike_v_numeric, 

22 is_extension_array_dtype, 

23 is_list_like, 

24 is_numeric_v_string_like, 

25 is_scalar, 

26 is_sparse, 

27) 

28from pandas.core.dtypes.concat import concat_compat 

29from pandas.core.dtypes.dtypes import ExtensionDtype 

30from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries 

31from pandas.core.dtypes.missing import isna 

32 

33import pandas.core.algorithms as algos 

34from pandas.core.base import PandasObject 

35from pandas.core.indexers import maybe_convert_indices 

36from pandas.core.indexes.api import Index, MultiIndex, ensure_index 

37from pandas.core.internals.blocks import ( 

38 Block, 

39 CategoricalBlock, 

40 DatetimeTZBlock, 

41 ExtensionBlock, 

42 ObjectValuesExtensionBlock, 

43 _extend_blocks, 

44 _merge_blocks, 

45 _safe_reshape, 

46 get_block_type, 

47 make_block, 

48) 

49from pandas.core.internals.concat import ( # all for concatenate_block_managers 

50 combine_concat_plans, 

51 concatenate_join_units, 

52 get_mgr_concatenation_plan, 

53 is_uniform_join_units, 

54) 

55 

56from pandas.io.formats.printing import pprint_thing 

57 

58# TODO: flexible with index=None and/or items=None 

59 

60 

61class BlockManager(PandasObject): 

62 """ 

63 Core internal data structure to implement DataFrame, Series, etc. 

64 

65 Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a 

66 lightweight blocked set of labeled data to be manipulated by the DataFrame 

67 public API class 

68 

69 Attributes 

70 ---------- 

71 shape 

72 ndim 

73 axes 

74 values 

75 items 

76 

77 Methods 

78 ------- 

79 set_axis(axis, new_labels) 

80 copy(deep=True) 

81 

82 get_dtype_counts 

83 get_dtypes 

84 

85 apply(func, axes, block_filter_fn) 

86 

87 get_bool_data 

88 get_numeric_data 

89 

90 get_slice(slice_like, axis) 

91 get(label) 

92 iget(loc) 

93 

94 take(indexer, axis) 

95 reindex_axis(new_labels, axis) 

96 reindex_indexer(new_labels, indexer, axis) 

97 

98 delete(label) 

99 insert(loc, label, value) 

100 set(label, value) 

101 

102 Parameters 

103 ---------- 

104 

105 

106 Notes 

107 ----- 

108 This is *not* a public API class 

109 """ 

110 

111 __slots__ = [ 

112 "axes", 

113 "blocks", 

114 "_ndim", 

115 "_shape", 

116 "_known_consolidated", 

117 "_is_consolidated", 

118 "_blknos", 

119 "_blklocs", 

120 ] 

121 

122 def __init__( 

123 self, 

124 blocks: Sequence[Block], 

125 axes: Sequence[Index], 

126 do_integrity_check: bool = True, 

127 ): 

128 self.axes = [ensure_index(ax) for ax in axes] 

129 self.blocks: Tuple[Block, ...] = tuple(blocks) 

130 

131 for block in blocks: 

132 if self.ndim != block.ndim: 

133 raise AssertionError( 

134 f"Number of Block dimensions ({block.ndim}) must equal " 

135 f"number of axes ({self.ndim})" 

136 ) 

137 

138 if do_integrity_check: 

139 self._verify_integrity() 

140 

141 self._consolidate_check() 

142 

143 self._rebuild_blknos_and_blklocs() 

144 

145 def make_empty(self, axes=None): 

146 """ return an empty BlockManager with the items axis of len 0 """ 

147 if axes is None: 

148 axes = [ensure_index([])] + [ensure_index(a) for a in self.axes[1:]] 

149 

150 # preserve dtype if possible 

151 if self.ndim == 1: 

152 blocks = np.array([], dtype=self.array_dtype) 

153 else: 

154 blocks = [] 

155 return type(self)(blocks, axes) 

156 

157 def __nonzero__(self): 

158 return True 

159 

160 # Python3 compat 

161 __bool__ = __nonzero__ 

162 

163 @property 

164 def shape(self): 

165 return tuple(len(ax) for ax in self.axes) 

166 

167 @property 

168 def ndim(self) -> int: 

169 return len(self.axes) 

170 

171 def set_axis(self, axis, new_labels): 

172 new_labels = ensure_index(new_labels) 

173 old_len = len(self.axes[axis]) 

174 new_len = len(new_labels) 

175 

176 if new_len != old_len: 

177 raise ValueError( 

178 f"Length mismatch: Expected axis has {old_len} elements, new " 

179 f"values have {new_len} elements" 

180 ) 

181 

182 self.axes[axis] = new_labels 

183 

184 def rename_axis(self, mapper, axis, copy=True, level=None): 

185 """ 

186 Rename one of axes. 

187 

188 Parameters 

189 ---------- 

190 mapper : unary callable 

191 axis : int 

192 copy : boolean, default True 

193 level : int, default None 

194 """ 

195 obj = self.copy(deep=copy) 

196 obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) 

197 return obj 

198 

199 @property 

200 def _is_single_block(self): 

201 if self.ndim == 1: 

202 return True 

203 

204 if len(self.blocks) != 1: 

205 return False 

206 

207 blk = self.blocks[0] 

208 return blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice == slice( 

209 0, len(self), 1 

210 ) 

211 

212 def _rebuild_blknos_and_blklocs(self): 

213 """ 

214 Update mgr._blknos / mgr._blklocs. 

215 """ 

216 new_blknos = np.empty(self.shape[0], dtype=np.int64) 

217 new_blklocs = np.empty(self.shape[0], dtype=np.int64) 

218 new_blknos.fill(-1) 

219 new_blklocs.fill(-1) 

220 

221 for blkno, blk in enumerate(self.blocks): 

222 rl = blk.mgr_locs 

223 new_blknos[rl.indexer] = blkno 

224 new_blklocs[rl.indexer] = np.arange(len(rl)) 

225 

226 if (new_blknos == -1).any(): 

227 raise AssertionError("Gaps in blk ref_locs") 

228 

229 self._blknos = new_blknos 

230 self._blklocs = new_blklocs 

231 

232 @property 

233 def items(self): 

234 return self.axes[0] 

235 

236 def _get_counts(self, f): 

237 """ return a dict of the counts of the function in BlockManager """ 

238 self._consolidate_inplace() 

239 counts = dict() 

240 for b in self.blocks: 

241 v = f(b) 

242 counts[v] = counts.get(v, 0) + b.shape[0] 

243 return counts 

244 

245 def get_dtype_counts(self): 

246 return self._get_counts(lambda b: b.dtype.name) 

247 

248 def get_dtypes(self): 

249 dtypes = np.array([blk.dtype for blk in self.blocks]) 

250 return algos.take_1d(dtypes, self._blknos, allow_fill=False) 

251 

252 def __getstate__(self): 

253 block_values = [b.values for b in self.blocks] 

254 block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] 

255 axes_array = list(self.axes) 

256 

257 extra_state = { 

258 "0.14.1": { 

259 "axes": axes_array, 

260 "blocks": [ 

261 dict(values=b.values, mgr_locs=b.mgr_locs.indexer) 

262 for b in self.blocks 

263 ], 

264 } 

265 } 

266 

267 # First three elements of the state are to maintain forward 

268 # compatibility with 0.13.1. 

269 return axes_array, block_values, block_items, extra_state 

270 

271 def __setstate__(self, state): 

272 def unpickle_block(values, mgr_locs): 

273 return make_block(values, placement=mgr_locs) 

274 

275 if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: 

276 state = state[3]["0.14.1"] 

277 self.axes = [ensure_index(ax) for ax in state["axes"]] 

278 self.blocks = tuple( 

279 unpickle_block(b["values"], b["mgr_locs"]) for b in state["blocks"] 

280 ) 

281 else: 

282 # discard anything after 3rd, support beta pickling format for a 

283 # little while longer 

284 ax_arrays, bvalues, bitems = state[:3] 

285 

286 self.axes = [ensure_index(ax) for ax in ax_arrays] 

287 

288 if len(bitems) == 1 and self.axes[0].equals(bitems[0]): 

289 # This is a workaround for pre-0.14.1 pickles that didn't 

290 # support unpickling multi-block frames/panels with non-unique 

291 # columns/items, because given a manager with items ["a", "b", 

292 # "a"] there's no way of knowing which block's "a" is where. 

293 # 

294 # Single-block case can be supported under the assumption that 

295 # block items corresponded to manager items 1-to-1. 

296 all_mgr_locs = [slice(0, len(bitems[0]))] 

297 else: 

298 all_mgr_locs = [ 

299 self.axes[0].get_indexer(blk_items) for blk_items in bitems 

300 ] 

301 

302 self.blocks = tuple( 

303 unpickle_block(values, mgr_locs) 

304 for values, mgr_locs in zip(bvalues, all_mgr_locs) 

305 ) 

306 

307 self._post_setstate() 

308 

309 def _post_setstate(self): 

310 self._is_consolidated = False 

311 self._known_consolidated = False 

312 self._rebuild_blknos_and_blklocs() 

313 

314 def __len__(self) -> int: 

315 return len(self.items) 

316 

317 def __repr__(self) -> str: 

318 output = type(self).__name__ 

319 for i, ax in enumerate(self.axes): 

320 if i == 0: 

321 output += f"\nItems: {ax}" 

322 else: 

323 output += f"\nAxis {i}: {ax}" 

324 

325 for block in self.blocks: 

326 output += f"\n{pprint_thing(block)}" 

327 return output 

328 

329 def _verify_integrity(self): 

330 mgr_shape = self.shape 

331 tot_items = sum(len(x.mgr_locs) for x in self.blocks) 

332 for block in self.blocks: 

333 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: 

334 construction_error(tot_items, block.shape[1:], self.axes) 

335 if len(self.items) != tot_items: 

336 raise AssertionError( 

337 "Number of manager items must equal union of " 

338 f"block items\n# manager items: {len(self.items)}, # " 

339 f"tot_items: {tot_items}" 

340 ) 

341 

342 def reduce(self, func, *args, **kwargs): 

343 # If 2D, we assume that we're operating column-wise 

344 if self.ndim == 1: 

345 # we'll be returning a scalar 

346 blk = self.blocks[0] 

347 return func(blk.values, *args, **kwargs) 

348 

349 res = {} 

350 for blk in self.blocks: 

351 bres = func(blk.values, *args, **kwargs) 

352 

353 if np.ndim(bres) == 0: 

354 # EA 

355 assert blk.shape[0] == 1 

356 new_res = zip(blk.mgr_locs.as_array, [bres]) 

357 else: 

358 assert bres.ndim == 1, bres.shape 

359 assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs) 

360 new_res = zip(blk.mgr_locs.as_array, bres) 

361 

362 nr = dict(new_res) 

363 assert not any(key in res for key in nr) 

364 res.update(nr) 

365 

366 return res 

367 

368 def apply(self, f, filter=None, **kwargs): 

369 """ 

370 Iterate over the blocks, collect and create a new BlockManager. 

371 

372 Parameters 

373 ---------- 

374 f : str or callable 

375 Name of the Block method to apply. 

376 filter : list, if supplied, only call the block if the filter is in 

377 the block 

378 

379 Returns 

380 ------- 

381 BlockManager 

382 """ 

383 

384 result_blocks = [] 

385 

386 # filter kwarg is used in replace-* family of methods 

387 if filter is not None: 

388 filter_locs = set(self.items.get_indexer_for(filter)) 

389 if len(filter_locs) == len(self.items): 

390 # All items are included, as if there were no filtering 

391 filter = None 

392 else: 

393 kwargs["filter"] = filter_locs 

394 

395 self._consolidate_inplace() 

396 

397 if f == "where": 

398 align_copy = True 

399 if kwargs.get("align", True): 

400 align_keys = ["other", "cond"] 

401 else: 

402 align_keys = ["cond"] 

403 elif f == "putmask": 

404 align_copy = False 

405 if kwargs.get("align", True): 

406 align_keys = ["new", "mask"] 

407 else: 

408 align_keys = ["mask"] 

409 elif f == "fillna": 

410 # fillna internally does putmask, maybe it's better to do this 

411 # at mgr, not block level? 

412 align_copy = False 

413 align_keys = ["value"] 

414 else: 

415 align_keys = [] 

416 

417 # TODO(EA): may interfere with ExtensionBlock.setitem for blocks 

418 # with a .values attribute. 

419 aligned_args = { 

420 k: kwargs[k] 

421 for k in align_keys 

422 if not isinstance(kwargs[k], ABCExtensionArray) 

423 and hasattr(kwargs[k], "values") 

424 } 

425 

426 for b in self.blocks: 

427 if filter is not None: 

428 if not b.mgr_locs.isin(filter_locs).any(): 

429 result_blocks.append(b) 

430 continue 

431 

432 if aligned_args: 

433 b_items = self.items[b.mgr_locs.indexer] 

434 

435 for k, obj in aligned_args.items(): 

436 axis = obj._info_axis_number 

437 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) 

438 

439 if callable(f): 

440 applied = b.apply(f, **kwargs) 

441 else: 

442 applied = getattr(b, f)(**kwargs) 

443 result_blocks = _extend_blocks(applied, result_blocks) 

444 

445 if len(result_blocks) == 0: 

446 return self.make_empty(self.axes) 

447 bm = type(self)(result_blocks, self.axes, do_integrity_check=False) 

448 return bm 

449 

450 def quantile( 

451 self, 

452 axis=0, 

453 consolidate=True, 

454 transposed=False, 

455 interpolation="linear", 

456 qs=None, 

457 numeric_only=None, 

458 ): 

459 """ 

460 Iterate over blocks applying quantile reduction. 

461 This routine is intended for reduction type operations and 

462 will do inference on the generated blocks. 

463 

464 Parameters 

465 ---------- 

466 axis: reduction axis, default 0 

467 consolidate: boolean, default True. Join together blocks having same 

468 dtype 

469 transposed: boolean, default False 

470 we are holding transposed data 

471 interpolation : type of interpolation, default 'linear' 

472 qs : a scalar or list of the quantiles to be computed 

473 numeric_only : ignored 

474 

475 Returns 

476 ------- 

477 Block Manager (new object) 

478 """ 

479 

480 # Series dispatches to DataFrame for quantile, which allows us to 

481 # simplify some of the code here and in the blocks 

482 assert self.ndim >= 2 

483 

484 if consolidate: 

485 self._consolidate_inplace() 

486 

487 def get_axe(block, qs, axes): 

488 # Because Series dispatches to DataFrame, we will always have 

489 # block.ndim == 2 

490 from pandas import Float64Index 

491 

492 if is_list_like(qs): 

493 ax = Float64Index(qs) 

494 else: 

495 ax = axes[0] 

496 return ax 

497 

498 axes, blocks = [], [] 

499 for b in self.blocks: 

500 block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) 

501 

502 axe = get_axe(b, qs, axes=self.axes) 

503 

504 axes.append(axe) 

505 blocks.append(block) 

506 

507 # note that some DatetimeTZ, Categorical are always ndim==1 

508 ndim = {b.ndim for b in blocks} 

509 assert 0 not in ndim, ndim 

510 

511 if 2 in ndim: 

512 

513 new_axes = list(self.axes) 

514 

515 # multiple blocks that are reduced 

516 if len(blocks) > 1: 

517 new_axes[1] = axes[0] 

518 

519 # reset the placement to the original 

520 for b, sb in zip(blocks, self.blocks): 

521 b.mgr_locs = sb.mgr_locs 

522 

523 else: 

524 new_axes[axis] = Index(np.concatenate([ax.values for ax in axes])) 

525 

526 if transposed: 

527 new_axes = new_axes[::-1] 

528 blocks = [ 

529 b.make_block(b.values.T, placement=np.arange(b.shape[1])) 

530 for b in blocks 

531 ] 

532 

533 return type(self)(blocks, new_axes) 

534 

535 # single block, i.e. ndim == {1} 

536 values = concat_compat([b.values for b in blocks]) 

537 

538 # compute the orderings of our original data 

539 if len(self.blocks) > 1: 

540 

541 indexer = np.empty(len(self.axes[0]), dtype=np.intp) 

542 i = 0 

543 for b in self.blocks: 

544 for j in b.mgr_locs: 

545 indexer[j] = i 

546 i = i + 1 

547 

548 values = values.take(indexer) 

549 

550 return SingleBlockManager( 

551 [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] 

552 ) 

553 

554 def isna(self, func): 

555 return self.apply("apply", func=func) 

556 

557 def where(self, **kwargs): 

558 return self.apply("where", **kwargs) 

559 

560 def setitem(self, **kwargs): 

561 return self.apply("setitem", **kwargs) 

562 

563 def putmask(self, **kwargs): 

564 return self.apply("putmask", **kwargs) 

565 

566 def diff(self, **kwargs): 

567 return self.apply("diff", **kwargs) 

568 

569 def interpolate(self, **kwargs): 

570 return self.apply("interpolate", **kwargs) 

571 

572 def shift(self, **kwargs): 

573 return self.apply("shift", **kwargs) 

574 

575 def fillna(self, **kwargs): 

576 return self.apply("fillna", **kwargs) 

577 

578 def downcast(self, **kwargs): 

579 return self.apply("downcast", **kwargs) 

580 

581 def astype(self, dtype, copy: bool = False, errors: str = "raise"): 

582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) 

583 

584 def convert(self, **kwargs): 

585 return self.apply("convert", **kwargs) 

586 

587 def replace(self, value, **kwargs): 

588 assert np.ndim(value) == 0, value 

589 return self.apply("replace", value=value, **kwargs) 

590 

591 def replace_list(self, src_list, dest_list, inplace=False, regex=False): 

592 """ do a list replace """ 

593 

594 inplace = validate_bool_kwarg(inplace, "inplace") 

595 

596 # figure out our mask a-priori to avoid repeated replacements 

597 values = self.as_array() 

598 

599 def comp(s, regex=False): 

600 """ 

601 Generate a bool array by perform an equality check, or perform 

602 an element-wise regular expression matching 

603 """ 

604 if isna(s): 

605 return isna(values) 

606 if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: 

607 

608 return _compare_or_regex_search( 

609 maybe_convert_objects(values), s.asm8, regex 

610 ) 

611 return _compare_or_regex_search(values, s, regex) 

612 

613 masks = [comp(s, regex) for i, s in enumerate(src_list)] 

614 

615 result_blocks = [] 

616 src_len = len(src_list) - 1 

617 for blk in self.blocks: 

618 

619 # its possible to get multiple result blocks here 

620 # replace ALWAYS will return a list 

621 rb = [blk if inplace else blk.copy()] 

622 for i, (s, d) in enumerate(zip(src_list, dest_list)): 

623 # TODO: assert/validate that `d` is always a scalar? 

624 new_rb = [] 

625 for b in rb: 

626 m = masks[i][b.mgr_locs.indexer] 

627 convert = i == src_len 

628 result = b._replace_coerce( 

629 mask=m, 

630 to_replace=s, 

631 value=d, 

632 inplace=inplace, 

633 convert=convert, 

634 regex=regex, 

635 ) 

636 if m.any() or convert: 

637 new_rb = _extend_blocks(result, new_rb) 

638 else: 

639 new_rb.append(b) 

640 rb = new_rb 

641 result_blocks.extend(rb) 

642 

643 bm = type(self)(result_blocks, self.axes) 

644 bm._consolidate_inplace() 

645 return bm 

646 

647 def is_consolidated(self): 

648 """ 

649 Return True if more than one block with the same dtype 

650 """ 

651 if not self._known_consolidated: 

652 self._consolidate_check() 

653 return self._is_consolidated 

654 

655 def _consolidate_check(self): 

656 ftypes = [blk.ftype for blk in self.blocks] 

657 self._is_consolidated = len(ftypes) == len(set(ftypes)) 

658 self._known_consolidated = True 

659 

660 @property 

661 def is_mixed_type(self): 

662 # Warning, consolidation needs to get checked upstairs 

663 self._consolidate_inplace() 

664 return len(self.blocks) > 1 

665 

666 @property 

667 def is_numeric_mixed_type(self): 

668 # Warning, consolidation needs to get checked upstairs 

669 self._consolidate_inplace() 

670 return all(block.is_numeric for block in self.blocks) 

671 

672 @property 

673 def is_datelike_mixed_type(self): 

674 # Warning, consolidation needs to get checked upstairs 

675 self._consolidate_inplace() 

676 return any(block.is_datelike for block in self.blocks) 

677 

678 @property 

679 def any_extension_types(self): 

680 """Whether any of the blocks in this manager are extension blocks""" 

681 return any(block.is_extension for block in self.blocks) 

682 

683 @property 

684 def is_view(self): 

685 """ return a boolean if we are a single block and are a view """ 

686 if len(self.blocks) == 1: 

687 return self.blocks[0].is_view 

688 

689 # It is technically possible to figure out which blocks are views 

690 # e.g. [ b.values.base is not None for b in self.blocks ] 

691 # but then we have the case of possibly some blocks being a view 

692 # and some blocks not. setting in theory is possible on the non-view 

693 # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit 

694 # complicated 

695 

696 return False 

697 

698 def get_bool_data(self, copy=False): 

699 """ 

700 Parameters 

701 ---------- 

702 copy : boolean, default False 

703 Whether to copy the blocks 

704 """ 

705 self._consolidate_inplace() 

706 return self.combine([b for b in self.blocks if b.is_bool], copy) 

707 

708 def get_numeric_data(self, copy=False): 

709 """ 

710 Parameters 

711 ---------- 

712 copy : boolean, default False 

713 Whether to copy the blocks 

714 """ 

715 self._consolidate_inplace() 

716 return self.combine([b for b in self.blocks if b.is_numeric], copy) 

717 

718 def combine(self, blocks, copy=True): 

719 """ return a new manager with the blocks """ 

720 if len(blocks) == 0: 

721 return self.make_empty() 

722 

723 # FIXME: optimization potential 

724 indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) 

725 inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) 

726 

727 new_blocks = [] 

728 for b in blocks: 

729 b = b.copy(deep=copy) 

730 b.mgr_locs = algos.take_1d( 

731 inv_indexer, b.mgr_locs.as_array, axis=0, allow_fill=False 

732 ) 

733 new_blocks.append(b) 

734 

735 axes = list(self.axes) 

736 axes[0] = self.items.take(indexer) 

737 

738 return type(self)(new_blocks, axes, do_integrity_check=False) 

739 

740 def get_slice(self, slobj: slice, axis: int = 0): 

741 if axis >= self.ndim: 

742 raise IndexError("Requested axis not found in manager") 

743 

744 if axis == 0: 

745 new_blocks = self._slice_take_blocks_ax0(slobj) 

746 else: 

747 _slicer = [slice(None)] * (axis + 1) 

748 _slicer[axis] = slobj 

749 slicer = tuple(_slicer) 

750 new_blocks = [blk.getitem_block(slicer) for blk in self.blocks] 

751 

752 new_axes = list(self.axes) 

753 new_axes[axis] = new_axes[axis][slobj] 

754 

755 bm = type(self)(new_blocks, new_axes, do_integrity_check=False) 

756 bm._consolidate_inplace() 

757 return bm 

758 

759 def __contains__(self, item) -> bool: 

760 return item in self.items 

761 

762 @property 

763 def nblocks(self) -> int: 

764 return len(self.blocks) 

765 

766 def copy(self, deep=True): 

767 """ 

768 Make deep or shallow copy of BlockManager 

769 

770 Parameters 

771 ---------- 

772 deep : bool or string, default True 

773 If False, return shallow copy (do not copy data) 

774 If 'all', copy data and a deep copy of the index 

775 

776 Returns 

777 ------- 

778 BlockManager 

779 """ 

780 # this preserves the notion of view copying of axes 

781 if deep: 

782 # hit in e.g. tests.io.json.test_pandas 

783 

784 def copy_func(ax): 

785 if deep == "all": 

786 return ax.copy(deep=True) 

787 else: 

788 return ax.view() 

789 

790 new_axes = [copy_func(ax) for ax in self.axes] 

791 else: 

792 new_axes = list(self.axes) 

793 

794 res = self.apply("copy", deep=deep) 

795 res.axes = new_axes 

796 return res 

797 

798 def as_array(self, transpose=False, items=None): 

799 """Convert the blockmanager data into an numpy array. 

800 

801 Parameters 

802 ---------- 

803 transpose : boolean, default False 

804 If True, transpose the return array 

805 items : list of strings or None 

806 Names of block items that will be included in the returned 

807 array. ``None`` means that all block items will be used 

808 

809 Returns 

810 ------- 

811 arr : ndarray 

812 """ 

813 if len(self.blocks) == 0: 

814 arr = np.empty(self.shape, dtype=float) 

815 return arr.transpose() if transpose else arr 

816 

817 if items is not None: 

818 mgr = self.reindex_axis(items, axis=0) 

819 else: 

820 mgr = self 

821 

822 if self._is_single_block and mgr.blocks[0].is_datetimetz: 

823 # TODO(Block.get_values): Make DatetimeTZBlock.get_values 

824 # always be object dtype. Some callers seem to want the 

825 # DatetimeArray (previously DTI) 

826 arr = mgr.blocks[0].get_values(dtype=object) 

827 elif self._is_single_block or not self.is_mixed_type: 

828 arr = np.asarray(mgr.blocks[0].get_values()) 

829 else: 

830 arr = mgr._interleave() 

831 

832 return arr.transpose() if transpose else arr 

833 

834 def _interleave(self): 

835 """ 

836 Return ndarray from blocks with specified item order 

837 Items must be contained in the blocks 

838 """ 

839 dtype = _interleaved_dtype(self.blocks) 

840 

841 # TODO: https://github.com/pandas-dev/pandas/issues/22791 

842 # Give EAs some input on what happens here. Sparse needs this. 

843 if is_sparse(dtype): 

844 dtype = dtype.subtype 

845 elif is_extension_array_dtype(dtype): 

846 dtype = "object" 

847 

848 result = np.empty(self.shape, dtype=dtype) 

849 

850 itemmask = np.zeros(self.shape[0]) 

851 

852 for blk in self.blocks: 

853 rl = blk.mgr_locs 

854 result[rl.indexer] = blk.get_values(dtype) 

855 itemmask[rl.indexer] = 1 

856 

857 if not itemmask.all(): 

858 raise AssertionError("Some items were not contained in blocks") 

859 

860 return result 

861 

862 def to_dict(self, copy=True): 

863 """ 

864 Return a dict of str(dtype) -> BlockManager 

865 

866 Parameters 

867 ---------- 

868 copy : boolean, default True 

869 

870 Returns 

871 ------- 

872 values : a dict of dtype -> BlockManager 

873 

874 Notes 

875 ----- 

876 This consolidates based on str(dtype) 

877 """ 

878 self._consolidate_inplace() 

879 

880 bd = {} 

881 for b in self.blocks: 

882 bd.setdefault(str(b.dtype), []).append(b) 

883 

884 return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()} 

885 

886 def fast_xs(self, loc): 

887 """ 

888 get a cross sectional for a given location in the 

889 items ; handle dups 

890 

891 return the result, is *could* be a view in the case of a 

892 single block 

893 """ 

894 if len(self.blocks) == 1: 

895 return self.blocks[0].iget((slice(None), loc)) 

896 

897 items = self.items 

898 

899 # non-unique (GH4726) 

900 if not items.is_unique: 

901 result = self._interleave() 

902 if self.ndim == 2: 

903 result = result.T 

904 return result[loc] 

905 

906 # unique 

907 dtype = _interleaved_dtype(self.blocks) 

908 

909 n = len(items) 

910 if is_extension_array_dtype(dtype): 

911 # we'll eventually construct an ExtensionArray. 

912 result = np.empty(n, dtype=object) 

913 else: 

914 result = np.empty(n, dtype=dtype) 

915 

916 for blk in self.blocks: 

917 # Such assignment may incorrectly coerce NaT to None 

918 # result[blk.mgr_locs] = blk._slice((slice(None), loc)) 

919 for i, rl in enumerate(blk.mgr_locs): 

920 result[rl] = blk.iget((i, loc)) 

921 

922 if is_extension_array_dtype(dtype): 

923 result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) 

924 

925 return result 

926 

927 def consolidate(self): 

928 """ 

929 Join together blocks having same dtype 

930 

931 Returns 

932 ------- 

933 y : BlockManager 

934 """ 

935 if self.is_consolidated(): 

936 return self 

937 

938 bm = type(self)(self.blocks, self.axes) 

939 bm._is_consolidated = False 

940 bm._consolidate_inplace() 

941 return bm 

942 

943 def _consolidate_inplace(self): 

944 if not self.is_consolidated(): 

945 self.blocks = tuple(_consolidate(self.blocks)) 

946 self._is_consolidated = True 

947 self._known_consolidated = True 

948 self._rebuild_blknos_and_blklocs() 

949 

950 def get(self, item): 

951 """ 

952 Return values for selected item (ndarray or BlockManager). 

953 """ 

954 if self.items.is_unique: 

955 

956 if not isna(item): 

957 loc = self.items.get_loc(item) 

958 else: 

959 indexer = np.arange(len(self.items))[isna(self.items)] 

960 

961 # allow a single nan location indexer 

962 if not is_scalar(indexer): 

963 if len(indexer) == 1: 

964 loc = indexer.item() 

965 else: 

966 raise ValueError("cannot label index with a null key") 

967 

968 return self.iget(loc) 

969 else: 

970 

971 if isna(item): 

972 raise TypeError("cannot label index with a null key") 

973 

974 indexer = self.items.get_indexer_for([item]) 

975 return self.reindex_indexer( 

976 new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True 

977 ) 

978 

979 def iget(self, i): 

980 """ 

981 Return the data as a SingleBlockManager if possible 

982 

983 Otherwise return as a ndarray 

984 """ 

985 block = self.blocks[self._blknos[i]] 

986 values = block.iget(self._blklocs[i]) 

987 

988 # shortcut for select a single-dim from a 2-dim BM 

989 return SingleBlockManager( 

990 [ 

991 block.make_block_same_class( 

992 values, placement=slice(0, len(values)), ndim=1 

993 ) 

994 ], 

995 self.axes[1], 

996 ) 

997 

998 def delete(self, item): 

999 """ 

1000 Delete selected item (items if non-unique) in-place. 

1001 """ 

1002 indexer = self.items.get_loc(item) 

1003 

1004 is_deleted = np.zeros(self.shape[0], dtype=np.bool_) 

1005 is_deleted[indexer] = True 

1006 ref_loc_offset = -is_deleted.cumsum() 

1007 

1008 is_blk_deleted = [False] * len(self.blocks) 

1009 

1010 if isinstance(indexer, int): 

1011 affected_start = indexer 

1012 else: 

1013 affected_start = is_deleted.nonzero()[0][0] 

1014 

1015 for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]): 

1016 blk = self.blocks[blkno] 

1017 bml = blk.mgr_locs 

1018 blk_del = is_deleted[bml.indexer].nonzero()[0] 

1019 

1020 if len(blk_del) == len(bml): 

1021 is_blk_deleted[blkno] = True 

1022 continue 

1023 elif len(blk_del) != 0: 

1024 blk.delete(blk_del) 

1025 bml = blk.mgr_locs 

1026 

1027 blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer]) 

1028 

1029 # FIXME: use Index.delete as soon as it uses fastpath=True 

1030 self.axes[0] = self.items[~is_deleted] 

1031 self.blocks = tuple( 

1032 b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno] 

1033 ) 

1034 self._shape = None 

1035 self._rebuild_blknos_and_blklocs() 

1036 

1037 def set(self, item, value): 

1038 """ 

1039 Set new item in-place. Does not consolidate. Adds new Block if not 

1040 contained in the current set of items 

1041 """ 

1042 # FIXME: refactor, clearly separate broadcasting & zip-like assignment 

1043 # can prob also fix the various if tests for sparse/categorical 

1044 

1045 value_is_extension_type = is_extension_array_dtype(value) 

1046 

1047 # categorical/sparse/datetimetz 

1048 if value_is_extension_type: 

1049 

1050 def value_getitem(placement): 

1051 return value 

1052 

1053 else: 

1054 if value.ndim == self.ndim - 1: 

1055 value = _safe_reshape(value, (1,) + value.shape) 

1056 

1057 def value_getitem(placement): 

1058 return value 

1059 

1060 else: 

1061 

1062 def value_getitem(placement): 

1063 return value[placement.indexer] 

1064 

1065 if value.shape[1:] != self.shape[1:]: 

1066 raise AssertionError( 

1067 "Shape of new values must be compatible with manager shape" 

1068 ) 

1069 

1070 try: 

1071 loc = self.items.get_loc(item) 

1072 except KeyError: 

1073 # This item wasn't present, just insert at end 

1074 self.insert(len(self.items), item, value) 

1075 return 

1076 

1077 if isinstance(loc, int): 

1078 loc = [loc] 

1079 

1080 blknos = self._blknos[loc] 

1081 blklocs = self._blklocs[loc].copy() 

1082 

1083 unfit_mgr_locs = [] 

1084 unfit_val_locs = [] 

1085 removed_blknos = [] 

1086 for blkno, val_locs in libinternals.get_blkno_placements(blknos, group=True): 

1087 blk = self.blocks[blkno] 

1088 blk_locs = blklocs[val_locs.indexer] 

1089 if blk.should_store(value): 

1090 blk.set(blk_locs, value_getitem(val_locs)) 

1091 else: 

1092 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) 

1093 unfit_val_locs.append(val_locs) 

1094 

1095 # If all block items are unfit, schedule the block for removal. 

1096 if len(val_locs) == len(blk.mgr_locs): 

1097 removed_blknos.append(blkno) 

1098 else: 

1099 self._blklocs[blk.mgr_locs.indexer] = -1 

1100 blk.delete(blk_locs) 

1101 self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk)) 

1102 

1103 if len(removed_blknos): 

1104 # Remove blocks & update blknos accordingly 

1105 is_deleted = np.zeros(self.nblocks, dtype=np.bool_) 

1106 is_deleted[removed_blknos] = True 

1107 

1108 new_blknos = np.empty(self.nblocks, dtype=np.int64) 

1109 new_blknos.fill(-1) 

1110 new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) 

1111 self._blknos = algos.take_1d( 

1112 new_blknos, self._blknos, axis=0, allow_fill=False 

1113 ) 

1114 self.blocks = tuple( 

1115 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) 

1116 ) 

1117 

1118 if unfit_val_locs: 

1119 unfit_mgr_locs = np.concatenate(unfit_mgr_locs) 

1120 unfit_count = len(unfit_mgr_locs) 

1121 

1122 new_blocks = [] 

1123 if value_is_extension_type: 

1124 # This code (ab-)uses the fact that sparse blocks contain only 

1125 # one item. 

1126 new_blocks.extend( 

1127 make_block( 

1128 values=value.copy(), 

1129 ndim=self.ndim, 

1130 placement=slice(mgr_loc, mgr_loc + 1), 

1131 ) 

1132 for mgr_loc in unfit_mgr_locs 

1133 ) 

1134 

1135 self._blknos[unfit_mgr_locs] = np.arange(unfit_count) + len(self.blocks) 

1136 self._blklocs[unfit_mgr_locs] = 0 

1137 

1138 else: 

1139 # unfit_val_locs contains BlockPlacement objects 

1140 unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) 

1141 

1142 new_blocks.append( 

1143 make_block( 

1144 values=value_getitem(unfit_val_items), 

1145 ndim=self.ndim, 

1146 placement=unfit_mgr_locs, 

1147 ) 

1148 ) 

1149 

1150 self._blknos[unfit_mgr_locs] = len(self.blocks) 

1151 self._blklocs[unfit_mgr_locs] = np.arange(unfit_count) 

1152 

1153 self.blocks += tuple(new_blocks) 

1154 

1155 # Newly created block's dtype may already be present. 

1156 self._known_consolidated = False 

1157 

1158 def insert(self, loc: int, item, value, allow_duplicates: bool = False): 

1159 """ 

1160 Insert item at selected position. 

1161 

1162 Parameters 

1163 ---------- 

1164 loc : int 

1165 item : hashable 

1166 value : array_like 

1167 allow_duplicates: bool 

1168 If False, trying to insert non-unique item will raise 

1169 

1170 """ 

1171 if not allow_duplicates and item in self.items: 

1172 # Should this be a different kind of error?? 

1173 raise ValueError(f"cannot insert {item}, already exists") 

1174 

1175 if not isinstance(loc, int): 

1176 raise TypeError("loc must be int") 

1177 

1178 # insert to the axis; this could possibly raise a TypeError 

1179 new_axis = self.items.insert(loc, item) 

1180 

1181 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) 

1182 

1183 for blkno, count in _fast_count_smallints(self._blknos[loc:]): 

1184 blk = self.blocks[blkno] 

1185 if count == len(blk.mgr_locs): 

1186 blk.mgr_locs = blk.mgr_locs.add(1) 

1187 else: 

1188 new_mgr_locs = blk.mgr_locs.as_array.copy() 

1189 new_mgr_locs[new_mgr_locs >= loc] += 1 

1190 blk.mgr_locs = new_mgr_locs 

1191 

1192 if loc == self._blklocs.shape[0]: 

1193 # np.append is a lot faster, let's use it if we can. 

1194 self._blklocs = np.append(self._blklocs, 0) 

1195 self._blknos = np.append(self._blknos, len(self.blocks)) 

1196 else: 

1197 self._blklocs = np.insert(self._blklocs, loc, 0) 

1198 self._blknos = np.insert(self._blknos, loc, len(self.blocks)) 

1199 

1200 self.axes[0] = new_axis 

1201 self.blocks += (block,) 

1202 self._shape = None 

1203 

1204 self._known_consolidated = False 

1205 

1206 if len(self.blocks) > 100: 

1207 self._consolidate_inplace() 

1208 

1209 def reindex_axis( 

1210 self, new_index, axis, method=None, limit=None, fill_value=None, copy=True 

1211 ): 

1212 """ 

1213 Conform block manager to new index. 

1214 """ 

1215 new_index = ensure_index(new_index) 

1216 new_index, indexer = self.axes[axis].reindex( 

1217 new_index, method=method, limit=limit 

1218 ) 

1219 

1220 return self.reindex_indexer( 

1221 new_index, indexer, axis=axis, fill_value=fill_value, copy=copy 

1222 ) 

1223 

1224 def reindex_indexer( 

1225 self, new_axis, indexer, axis, fill_value=None, allow_dups=False, copy=True 

1226 ): 

1227 """ 

1228 Parameters 

1229 ---------- 

1230 new_axis : Index 

1231 indexer : ndarray of int64 or None 

1232 axis : int 

1233 fill_value : object 

1234 allow_dups : bool 

1235 

1236 pandas-indexer with -1's only. 

1237 """ 

1238 if indexer is None: 

1239 if new_axis is self.axes[axis] and not copy: 

1240 return self 

1241 

1242 result = self.copy(deep=copy) 

1243 result.axes = list(self.axes) 

1244 result.axes[axis] = new_axis 

1245 return result 

1246 

1247 self._consolidate_inplace() 

1248 

1249 # some axes don't allow reindexing with dups 

1250 if not allow_dups: 

1251 self.axes[axis]._can_reindex(indexer) 

1252 

1253 if axis >= self.ndim: 

1254 raise IndexError("Requested axis not found in manager") 

1255 

1256 if axis == 0: 

1257 new_blocks = self._slice_take_blocks_ax0(indexer, fill_tuple=(fill_value,)) 

1258 else: 

1259 new_blocks = [ 

1260 blk.take_nd( 

1261 indexer, 

1262 axis=axis, 

1263 fill_tuple=( 

1264 fill_value if fill_value is not None else blk.fill_value, 

1265 ), 

1266 ) 

1267 for blk in self.blocks 

1268 ] 

1269 

1270 new_axes = list(self.axes) 

1271 new_axes[axis] = new_axis 

1272 return type(self)(new_blocks, new_axes) 

1273 

1274 def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): 

1275 """ 

1276 Slice/take blocks along axis=0. 

1277 

1278 Overloaded for SingleBlock 

1279 

1280 Returns 

1281 ------- 

1282 new_blocks : list of Block 

1283 """ 

1284 

1285 allow_fill = fill_tuple is not None 

1286 

1287 sl_type, slobj, sllen = _preprocess_slice_or_indexer( 

1288 slice_or_indexer, self.shape[0], allow_fill=allow_fill 

1289 ) 

1290 

1291 if self._is_single_block: 

1292 blk = self.blocks[0] 

1293 

1294 if sl_type in ("slice", "mask"): 

1295 return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] 

1296 elif not allow_fill or self.ndim == 1: 

1297 if allow_fill and fill_tuple[0] is None: 

1298 _, fill_value = maybe_promote(blk.dtype) 

1299 fill_tuple = (fill_value,) 

1300 

1301 return [ 

1302 blk.take_nd( 

1303 slobj, 

1304 axis=0, 

1305 new_mgr_locs=slice(0, sllen), 

1306 fill_tuple=fill_tuple, 

1307 ) 

1308 ] 

1309 

1310 if sl_type in ("slice", "mask"): 

1311 blknos = self._blknos[slobj] 

1312 blklocs = self._blklocs[slobj] 

1313 else: 

1314 blknos = algos.take_1d( 

1315 self._blknos, slobj, fill_value=-1, allow_fill=allow_fill 

1316 ) 

1317 blklocs = algos.take_1d( 

1318 self._blklocs, slobj, fill_value=-1, allow_fill=allow_fill 

1319 ) 

1320 

1321 # When filling blknos, make sure blknos is updated before appending to 

1322 # blocks list, that way new blkno is exactly len(blocks). 

1323 # 

1324 # FIXME: mgr_groupby_blknos must return mgr_locs in ascending order, 

1325 # pytables serialization will break otherwise. 

1326 blocks = [] 

1327 for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=True): 

1328 if blkno == -1: 

1329 # If we've got here, fill_tuple was not None. 

1330 fill_value = fill_tuple[0] 

1331 

1332 blocks.append( 

1333 self._make_na_block(placement=mgr_locs, fill_value=fill_value) 

1334 ) 

1335 else: 

1336 blk = self.blocks[blkno] 

1337 

1338 # Otherwise, slicing along items axis is necessary. 

1339 if not blk._can_consolidate: 

1340 # A non-consolidatable block, it's easy, because there's 

1341 # only one item and each mgr loc is a copy of that single 

1342 # item. 

1343 for mgr_loc in mgr_locs: 

1344 newblk = blk.copy(deep=True) 

1345 newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1) 

1346 blocks.append(newblk) 

1347 

1348 else: 

1349 blocks.append( 

1350 blk.take_nd( 

1351 blklocs[mgr_locs.indexer], 

1352 axis=0, 

1353 new_mgr_locs=mgr_locs, 

1354 fill_tuple=None, 

1355 ) 

1356 ) 

1357 

1358 return blocks 

1359 

1360 def _make_na_block(self, placement, fill_value=None): 

1361 # TODO: infer dtypes other than float64 from fill_value 

1362 

1363 if fill_value is None: 

1364 fill_value = np.nan 

1365 block_shape = list(self.shape) 

1366 block_shape[0] = len(placement) 

1367 

1368 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

1369 block_values = np.empty(block_shape, dtype=dtype) 

1370 block_values.fill(fill_value) 

1371 return make_block(block_values, placement=placement) 

1372 

1373 def take(self, indexer, axis=1, verify=True, convert=True): 

1374 """ 

1375 Take items along any axis. 

1376 """ 

1377 self._consolidate_inplace() 

1378 indexer = ( 

1379 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") 

1380 if isinstance(indexer, slice) 

1381 else np.asanyarray(indexer, dtype="int64") 

1382 ) 

1383 

1384 n = self.shape[axis] 

1385 if convert: 

1386 indexer = maybe_convert_indices(indexer, n) 

1387 

1388 if verify: 

1389 if ((indexer == -1) | (indexer >= n)).any(): 

1390 raise Exception("Indices must be nonzero and less than the axis length") 

1391 

1392 new_labels = self.axes[axis].take(indexer) 

1393 return self.reindex_indexer( 

1394 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True 

1395 ) 

1396 

1397 def equals(self, other): 

1398 self_axes, other_axes = self.axes, other.axes 

1399 if len(self_axes) != len(other_axes): 

1400 return False 

1401 if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): 

1402 return False 

1403 self._consolidate_inplace() 

1404 other._consolidate_inplace() 

1405 if len(self.blocks) != len(other.blocks): 

1406 return False 

1407 

1408 # canonicalize block order, using a tuple combining the mgr_locs 

1409 # then type name because there might be unconsolidated 

1410 # blocks (say, Categorical) which can only be distinguished by 

1411 # the iteration order 

1412 def canonicalize(block): 

1413 return (block.mgr_locs.as_array.tolist(), block.dtype.name) 

1414 

1415 self_blocks = sorted(self.blocks, key=canonicalize) 

1416 other_blocks = sorted(other.blocks, key=canonicalize) 

1417 return all( 

1418 block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) 

1419 ) 

1420 

1421 def unstack(self, unstacker_func, fill_value): 

1422 """Return a blockmanager with all blocks unstacked. 

1423 

1424 Parameters 

1425 ---------- 

1426 unstacker_func : callable 

1427 A (partially-applied) ``pd.core.reshape._Unstacker`` class. 

1428 fill_value : Any 

1429 fill_value for newly introduced missing values. 

1430 

1431 Returns 

1432 ------- 

1433 unstacked : BlockManager 

1434 """ 

1435 n_rows = self.shape[-1] 

1436 dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items) 

1437 new_columns = dummy.get_new_columns() 

1438 new_index = dummy.get_new_index() 

1439 new_blocks = [] 

1440 columns_mask = [] 

1441 

1442 for blk in self.blocks: 

1443 blocks, mask = blk._unstack( 

1444 partial(unstacker_func, value_columns=self.items[blk.mgr_locs.indexer]), 

1445 new_columns, 

1446 n_rows, 

1447 fill_value, 

1448 ) 

1449 

1450 new_blocks.extend(blocks) 

1451 columns_mask.extend(mask) 

1452 

1453 new_columns = new_columns[columns_mask] 

1454 

1455 bm = BlockManager(new_blocks, [new_columns, new_index]) 

1456 return bm 

1457 

1458 

1459class SingleBlockManager(BlockManager): 

1460 """ manage a single block with """ 

1461 

1462 ndim = 1 

1463 _is_consolidated = True 

1464 _known_consolidated = True 

1465 __slots__ = () 

1466 

1467 def __init__( 

1468 self, 

1469 block: Block, 

1470 axis: Union[Index, List[Index]], 

1471 do_integrity_check: bool = False, 

1472 fastpath: bool = False, 

1473 ): 

1474 if isinstance(axis, list): 

1475 if len(axis) != 1: 

1476 raise ValueError( 

1477 "cannot create SingleBlockManager with more than 1 axis" 

1478 ) 

1479 axis = axis[0] 

1480 

1481 # passed from constructor, single block, single axis 

1482 if fastpath: 

1483 self.axes = [axis] 

1484 if isinstance(block, list): 

1485 

1486 # empty block 

1487 if len(block) == 0: 

1488 block = [np.array([])] 

1489 elif len(block) != 1: 

1490 raise ValueError( 

1491 "Cannot create SingleBlockManager with more than 1 block" 

1492 ) 

1493 block = block[0] 

1494 else: 

1495 self.axes = [ensure_index(axis)] 

1496 

1497 # create the block here 

1498 if isinstance(block, list): 

1499 

1500 # provide consolidation to the interleaved_dtype 

1501 if len(block) > 1: 

1502 dtype = _interleaved_dtype(block) 

1503 block = [b.astype(dtype) for b in block] 

1504 block = _consolidate(block) 

1505 

1506 if len(block) != 1: 

1507 raise ValueError( 

1508 "Cannot create SingleBlockManager with more than 1 block" 

1509 ) 

1510 block = block[0] 

1511 

1512 if not isinstance(block, Block): 

1513 block = make_block(block, placement=slice(0, len(axis)), ndim=1) 

1514 

1515 self.blocks = tuple([block]) 

1516 

1517 def _post_setstate(self): 

1518 pass 

1519 

1520 @property 

1521 def _block(self): 

1522 return self.blocks[0] 

1523 

1524 @property 

1525 def _values(self): 

1526 return self._block.values 

1527 

1528 @property 

1529 def _blknos(self): 

1530 """ compat with BlockManager """ 

1531 return None 

1532 

1533 @property 

1534 def _blklocs(self): 

1535 """ compat with BlockManager """ 

1536 return None 

1537 

1538 def get_slice(self, slobj, axis=0): 

1539 if axis >= self.ndim: 

1540 raise IndexError("Requested axis not found in manager") 

1541 

1542 return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True,) 

1543 

1544 @property 

1545 def index(self): 

1546 return self.axes[0] 

1547 

1548 @property 

1549 def dtype(self): 

1550 return self._block.dtype 

1551 

1552 @property 

1553 def array_dtype(self): 

1554 return self._block.array_dtype 

1555 

1556 def get_dtype_counts(self): 

1557 return {self.dtype.name: 1} 

1558 

1559 def get_dtypes(self): 

1560 return np.array([self._block.dtype]) 

1561 

1562 def external_values(self): 

1563 return self._block.external_values() 

1564 

1565 def internal_values(self): 

1566 return self._block.internal_values() 

1567 

1568 def get_values(self): 

1569 """ return a dense type view """ 

1570 return np.array(self._block.to_dense(), copy=False) 

1571 

1572 @property 

1573 def _can_hold_na(self): 

1574 return self._block._can_hold_na 

1575 

1576 def is_consolidated(self): 

1577 return True 

1578 

1579 def _consolidate_check(self): 

1580 pass 

1581 

1582 def _consolidate_inplace(self): 

1583 pass 

1584 

1585 def delete(self, item): 

1586 """ 

1587 Delete single item from SingleBlockManager. 

1588 

1589 Ensures that self.blocks doesn't become empty. 

1590 """ 

1591 loc = self.items.get_loc(item) 

1592 self._block.delete(loc) 

1593 self.axes[0] = self.axes[0].delete(loc) 

1594 

1595 def fast_xs(self, loc): 

1596 """ 

1597 fast path for getting a cross-section 

1598 return a view of the data 

1599 """ 

1600 return self._block.values[loc] 

1601 

1602 def concat(self, to_concat, new_axis): 

1603 """ 

1604 Concatenate a list of SingleBlockManagers into a single 

1605 SingleBlockManager. 

1606 

1607 Used for pd.concat of Series objects with axis=0. 

1608 

1609 Parameters 

1610 ---------- 

1611 to_concat : list of SingleBlockManagers 

1612 new_axis : Index of the result 

1613 

1614 Returns 

1615 ------- 

1616 SingleBlockManager 

1617 

1618 """ 

1619 non_empties = [x for x in to_concat if len(x) > 0] 

1620 

1621 # check if all series are of the same block type: 

1622 if len(non_empties) > 0: 

1623 blocks = [obj.blocks[0] for obj in non_empties] 

1624 if len({b.dtype for b in blocks}) == 1: 

1625 new_block = blocks[0].concat_same_type(blocks) 

1626 else: 

1627 values = [x.values for x in blocks] 

1628 values = concat_compat(values) 

1629 new_block = make_block(values, placement=slice(0, len(values), 1)) 

1630 else: 

1631 values = [x._block.values for x in to_concat] 

1632 values = concat_compat(values) 

1633 new_block = make_block(values, placement=slice(0, len(values), 1)) 

1634 

1635 mgr = SingleBlockManager(new_block, new_axis) 

1636 return mgr 

1637 

1638 

1639# -------------------------------------------------------------------- 

1640# Constructor Helpers 

1641 

1642 

1643def create_block_manager_from_blocks(blocks, axes): 

1644 try: 

1645 if len(blocks) == 1 and not isinstance(blocks[0], Block): 

1646 # if blocks[0] is of length 0, return empty blocks 

1647 if not len(blocks[0]): 

1648 blocks = [] 

1649 else: 

1650 # It's OK if a single block is passed as values, its placement 

1651 # is basically "all items", but if there're many, don't bother 

1652 # converting, it's an error anyway. 

1653 blocks = [ 

1654 make_block(values=blocks[0], placement=slice(0, len(axes[0]))) 

1655 ] 

1656 

1657 mgr = BlockManager(blocks, axes) 

1658 mgr._consolidate_inplace() 

1659 return mgr 

1660 

1661 except ValueError as e: 

1662 blocks = [getattr(b, "values", b) for b in blocks] 

1663 tot_items = sum(b.shape[0] for b in blocks) 

1664 construction_error(tot_items, blocks[0].shape[1:], axes, e) 

1665 

1666 

1667def create_block_manager_from_arrays(arrays, names, axes): 

1668 

1669 try: 

1670 blocks = form_blocks(arrays, names, axes) 

1671 mgr = BlockManager(blocks, axes) 

1672 mgr._consolidate_inplace() 

1673 return mgr 

1674 except ValueError as e: 

1675 construction_error(len(arrays), arrays[0].shape, axes, e) 

1676 

1677 

1678def construction_error(tot_items, block_shape, axes, e=None): 

1679 """ raise a helpful message about our construction """ 

1680 passed = tuple(map(int, [tot_items] + list(block_shape))) 

1681 # Correcting the user facing error message during dataframe construction 

1682 if len(passed) <= 2: 

1683 passed = passed[::-1] 

1684 

1685 implied = tuple(len(ax) for ax in axes) 

1686 # Correcting the user facing error message during dataframe construction 

1687 if len(implied) <= 2: 

1688 implied = implied[::-1] 

1689 

1690 if passed == implied and e is not None: 

1691 raise e 

1692 if block_shape[0] == 0: 

1693 raise ValueError("Empty data passed with indices specified.") 

1694 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") 

1695 

1696 

1697# ----------------------------------------------------------------------- 

1698 

1699 

1700def form_blocks(arrays, names, axes): 

1701 # put "leftover" items in float bucket, where else? 

1702 # generalize? 

1703 items_dict = defaultdict(list) 

1704 extra_locs = [] 

1705 

1706 names_idx = ensure_index(names) 

1707 if names_idx.equals(axes[0]): 

1708 names_indexer = np.arange(len(names_idx)) 

1709 else: 

1710 assert names_idx.intersection(axes[0]).is_unique 

1711 names_indexer = names_idx.get_indexer_for(axes[0]) 

1712 

1713 for i, name_idx in enumerate(names_indexer): 

1714 if name_idx == -1: 

1715 extra_locs.append(i) 

1716 continue 

1717 

1718 k = names[name_idx] 

1719 v = arrays[name_idx] 

1720 

1721 block_type = get_block_type(v) 

1722 items_dict[block_type.__name__].append((i, k, v)) 

1723 

1724 blocks = [] 

1725 if len(items_dict["FloatBlock"]): 

1726 float_blocks = _multi_blockify(items_dict["FloatBlock"]) 

1727 blocks.extend(float_blocks) 

1728 

1729 if len(items_dict["ComplexBlock"]): 

1730 complex_blocks = _multi_blockify(items_dict["ComplexBlock"]) 

1731 blocks.extend(complex_blocks) 

1732 

1733 if len(items_dict["TimeDeltaBlock"]): 

1734 timedelta_blocks = _multi_blockify(items_dict["TimeDeltaBlock"]) 

1735 blocks.extend(timedelta_blocks) 

1736 

1737 if len(items_dict["IntBlock"]): 

1738 int_blocks = _multi_blockify(items_dict["IntBlock"]) 

1739 blocks.extend(int_blocks) 

1740 

1741 if len(items_dict["DatetimeBlock"]): 

1742 datetime_blocks = _simple_blockify(items_dict["DatetimeBlock"], _NS_DTYPE) 

1743 blocks.extend(datetime_blocks) 

1744 

1745 if len(items_dict["DatetimeTZBlock"]): 

1746 dttz_blocks = [ 

1747 make_block(array, klass=DatetimeTZBlock, placement=[i]) 

1748 for i, _, array in items_dict["DatetimeTZBlock"] 

1749 ] 

1750 blocks.extend(dttz_blocks) 

1751 

1752 if len(items_dict["BoolBlock"]): 

1753 bool_blocks = _simple_blockify(items_dict["BoolBlock"], np.bool_) 

1754 blocks.extend(bool_blocks) 

1755 

1756 if len(items_dict["ObjectBlock"]) > 0: 

1757 object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) 

1758 blocks.extend(object_blocks) 

1759 

1760 if len(items_dict["CategoricalBlock"]) > 0: 

1761 cat_blocks = [ 

1762 make_block(array, klass=CategoricalBlock, placement=[i]) 

1763 for i, _, array in items_dict["CategoricalBlock"] 

1764 ] 

1765 blocks.extend(cat_blocks) 

1766 

1767 if len(items_dict["ExtensionBlock"]): 

1768 

1769 external_blocks = [ 

1770 make_block(array, klass=ExtensionBlock, placement=[i]) 

1771 for i, _, array in items_dict["ExtensionBlock"] 

1772 ] 

1773 

1774 blocks.extend(external_blocks) 

1775 

1776 if len(items_dict["ObjectValuesExtensionBlock"]): 

1777 external_blocks = [ 

1778 make_block(array, klass=ObjectValuesExtensionBlock, placement=[i]) 

1779 for i, _, array in items_dict["ObjectValuesExtensionBlock"] 

1780 ] 

1781 

1782 blocks.extend(external_blocks) 

1783 

1784 if len(extra_locs): 

1785 shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) 

1786 

1787 # empty items -> dtype object 

1788 block_values = np.empty(shape, dtype=object) 

1789 block_values.fill(np.nan) 

1790 

1791 na_block = make_block(block_values, placement=extra_locs) 

1792 blocks.append(na_block) 

1793 

1794 return blocks 

1795 

1796 

1797def _simple_blockify(tuples, dtype): 

1798 """ return a single array of a block that has a single dtype; if dtype is 

1799 not None, coerce to this dtype 

1800 """ 

1801 values, placement = _stack_arrays(tuples, dtype) 

1802 

1803 # TODO: CHECK DTYPE? 

1804 if dtype is not None and values.dtype != dtype: # pragma: no cover 

1805 values = values.astype(dtype) 

1806 

1807 block = make_block(values, placement=placement) 

1808 return [block] 

1809 

1810 

1811def _multi_blockify(tuples, dtype=None): 

1812 """ return an array of blocks that potentially have different dtypes """ 

1813 

1814 # group by dtype 

1815 grouper = itertools.groupby(tuples, lambda x: x[2].dtype) 

1816 

1817 new_blocks = [] 

1818 for dtype, tup_block in grouper: 

1819 

1820 values, placement = _stack_arrays(list(tup_block), dtype) 

1821 

1822 block = make_block(values, placement=placement) 

1823 new_blocks.append(block) 

1824 

1825 return new_blocks 

1826 

1827 

1828def _stack_arrays(tuples, dtype): 

1829 

1830 # fml 

1831 def _asarray_compat(x): 

1832 if isinstance(x, ABCSeries): 

1833 return x._values 

1834 else: 

1835 return np.asarray(x) 

1836 

1837 def _shape_compat(x): 

1838 if isinstance(x, ABCSeries): 

1839 return (len(x),) 

1840 else: 

1841 return x.shape 

1842 

1843 placement, names, arrays = zip(*tuples) 

1844 

1845 first = arrays[0] 

1846 shape = (len(arrays),) + _shape_compat(first) 

1847 

1848 stacked = np.empty(shape, dtype=dtype) 

1849 for i, arr in enumerate(arrays): 

1850 stacked[i] = _asarray_compat(arr) 

1851 

1852 return stacked, placement 

1853 

1854 

1855def _interleaved_dtype( 

1856 blocks: List[Block], 

1857) -> Optional[Union[np.dtype, ExtensionDtype]]: 

1858 """Find the common dtype for `blocks`. 

1859 

1860 Parameters 

1861 ---------- 

1862 blocks : List[Block] 

1863 

1864 Returns 

1865 ------- 

1866 dtype : Optional[Union[np.dtype, ExtensionDtype]] 

1867 None is returned when `blocks` is empty. 

1868 """ 

1869 if not len(blocks): 

1870 return None 

1871 

1872 return find_common_type([b.dtype for b in blocks]) 

1873 

1874 

1875def _consolidate(blocks): 

1876 """ 

1877 Merge blocks having same dtype, exclude non-consolidating blocks 

1878 """ 

1879 

1880 # sort by _can_consolidate, dtype 

1881 gkey = lambda x: x._consolidate_key 

1882 grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) 

1883 

1884 new_blocks = [] 

1885 for (_can_consolidate, dtype), group_blocks in grouper: 

1886 merged_blocks = _merge_blocks( 

1887 list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate 

1888 ) 

1889 new_blocks = _extend_blocks(merged_blocks, new_blocks) 

1890 return new_blocks 

1891 

1892 

1893def _compare_or_regex_search(a, b, regex=False): 

1894 """ 

1895 Compare two array_like inputs of the same shape or two scalar values 

1896 

1897 Calls operator.eq or re.search, depending on regex argument. If regex is 

1898 True, perform an element-wise regex matching. 

1899 

1900 Parameters 

1901 ---------- 

1902 a : array_like or scalar 

1903 b : array_like or scalar 

1904 regex : bool, default False 

1905 

1906 Returns 

1907 ------- 

1908 mask : array_like of bool 

1909 """ 

1910 if not regex: 

1911 op = lambda x: operator.eq(x, b) 

1912 else: 

1913 op = np.vectorize( 

1914 lambda x: bool(re.search(b, x)) if isinstance(x, str) else False 

1915 ) 

1916 

1917 is_a_array = isinstance(a, np.ndarray) 

1918 is_b_array = isinstance(b, np.ndarray) 

1919 

1920 if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): 

1921 # GH#29553 avoid deprecation warnings from numpy 

1922 result = False 

1923 else: 

1924 result = op(a) 

1925 

1926 if is_scalar(result) and (is_a_array or is_b_array): 

1927 type_names = [type(a).__name__, type(b).__name__] 

1928 

1929 if is_a_array: 

1930 type_names[0] = f"ndarray(dtype={a.dtype})" 

1931 

1932 if is_b_array: 

1933 type_names[1] = f"ndarray(dtype={b.dtype})" 

1934 

1935 raise TypeError( 

1936 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" 

1937 ) 

1938 return result 

1939 

1940 

1941def _transform_index(index, func, level=None): 

1942 """ 

1943 Apply function to all values found in index. 

1944 

1945 This includes transforming multiindex entries separately. 

1946 Only apply function to one level of the MultiIndex if level is specified. 

1947 

1948 """ 

1949 if isinstance(index, MultiIndex): 

1950 if level is not None: 

1951 items = [ 

1952 tuple(func(y) if i == level else y for i, y in enumerate(x)) 

1953 for x in index 

1954 ] 

1955 else: 

1956 items = [tuple(func(y) for y in x) for x in index] 

1957 return MultiIndex.from_tuples(items, names=index.names) 

1958 else: 

1959 items = [func(x) for x in index] 

1960 return Index(items, name=index.name, tupleize_cols=False) 

1961 

1962 

1963def _fast_count_smallints(arr): 

1964 """Faster version of set(arr) for sequences of small numbers.""" 

1965 counts = np.bincount(arr.astype(np.int_)) 

1966 nz = counts.nonzero()[0] 

1967 return np.c_[nz, counts[nz]] 

1968 

1969 

1970def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): 

1971 if isinstance(slice_or_indexer, slice): 

1972 return ( 

1973 "slice", 

1974 slice_or_indexer, 

1975 libinternals.slice_len(slice_or_indexer, length), 

1976 ) 

1977 elif ( 

1978 isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_ 

1979 ): 

1980 return "mask", slice_or_indexer, slice_or_indexer.sum() 

1981 else: 

1982 indexer = np.asanyarray(slice_or_indexer, dtype=np.int64) 

1983 if not allow_fill: 

1984 indexer = maybe_convert_indices(indexer, length) 

1985 return "fancy", indexer, len(indexer) 

1986 

1987 

1988def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): 

1989 """ 

1990 Concatenate block managers into one. 

1991 

1992 Parameters 

1993 ---------- 

1994 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples 

1995 axes : list of Index 

1996 concat_axis : int 

1997 copy : bool 

1998 

1999 """ 

2000 concat_plans = [ 

2001 get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers 

2002 ] 

2003 concat_plan = combine_concat_plans(concat_plans, concat_axis) 

2004 blocks = [] 

2005 

2006 for placement, join_units in concat_plan: 

2007 

2008 if len(join_units) == 1 and not join_units[0].indexers: 

2009 b = join_units[0].block 

2010 values = b.values 

2011 if copy: 

2012 values = values.copy() 

2013 else: 

2014 values = values.view() 

2015 b = b.make_block_same_class(values, placement=placement) 

2016 elif is_uniform_join_units(join_units): 

2017 b = join_units[0].block.concat_same_type( 

2018 [ju.block for ju in join_units], placement=placement 

2019 ) 

2020 else: 

2021 b = make_block( 

2022 concatenate_join_units(join_units, concat_axis, copy=copy), 

2023 placement=placement, 

2024 ) 

2025 blocks.append(b) 

2026 

2027 return BlockManager(blocks, axes)