Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Collection of utilities to manipulate structured arrays. 

3 

4Most of these functions were initially implemented by John Hunter for 

5matplotlib. They have been rewritten and extended for convenience. 

6 

7""" 

8import itertools 

9import numpy as np 

10import numpy.ma as ma 

11from numpy import ndarray, recarray 

12from numpy.ma import MaskedArray 

13from numpy.ma.mrecords import MaskedRecords 

14from numpy.core.overrides import array_function_dispatch 

15from numpy.lib._iotools import _is_string_like 

16from numpy.testing import suppress_warnings 

17 

18_check_fill_value = np.ma.core._check_fill_value 

19 

20 

21__all__ = [ 

22 'append_fields', 'apply_along_fields', 'assign_fields_by_name', 

23 'drop_fields', 'find_duplicates', 'flatten_descr', 

24 'get_fieldstructure', 'get_names', 'get_names_flat', 

25 'join_by', 'merge_arrays', 'rec_append_fields', 

26 'rec_drop_fields', 'rec_join', 'recursive_fill_fields', 

27 'rename_fields', 'repack_fields', 'require_fields', 

28 'stack_arrays', 'structured_to_unstructured', 'unstructured_to_structured', 

29 ] 

30 

31 

32def _recursive_fill_fields_dispatcher(input, output): 

33 return (input, output) 

34 

35 

36@array_function_dispatch(_recursive_fill_fields_dispatcher) 

37def recursive_fill_fields(input, output): 

38 """ 

39 Fills fields from output with fields from input, 

40 with support for nested structures. 

41 

42 Parameters 

43 ---------- 

44 input : ndarray 

45 Input array. 

46 output : ndarray 

47 Output array. 

48 

49 Notes 

50 ----- 

51 * `output` should be at least the same size as `input` 

52 

53 Examples 

54 -------- 

55 >>> from numpy.lib import recfunctions as rfn 

56 >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) 

57 >>> b = np.zeros((3,), dtype=a.dtype) 

58 >>> rfn.recursive_fill_fields(a, b) 

59 array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')]) 

60 

61 """ 

62 newdtype = output.dtype 

63 for field in newdtype.names: 

64 try: 

65 current = input[field] 

66 except ValueError: 

67 continue 

68 if current.dtype.names is not None: 

69 recursive_fill_fields(current, output[field]) 

70 else: 

71 output[field][:len(current)] = current 

72 return output 

73 

74 

75def _get_fieldspec(dtype): 

76 """ 

77 Produce a list of name/dtype pairs corresponding to the dtype fields 

78 

79 Similar to dtype.descr, but the second item of each tuple is a dtype, not a 

80 string. As a result, this handles subarray dtypes 

81 

82 Can be passed to the dtype constructor to reconstruct the dtype, noting that 

83 this (deliberately) discards field offsets. 

84 

85 Examples 

86 -------- 

87 >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) 

88 >>> dt.descr 

89 [(('a', 'A'), '<i8'), ('b', '<f8', (3,))] 

90 >>> _get_fieldspec(dt) 

91 [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))] 

92 

93 """ 

94 if dtype.names is None: 

95 # .descr returns a nameless field, so we should too 

96 return [('', dtype)] 

97 else: 

98 fields = ((name, dtype.fields[name]) for name in dtype.names) 

99 # keep any titles, if present 

100 return [ 

101 (name if len(f) == 2 else (f[2], name), f[0]) 

102 for name, f in fields 

103 ] 

104 

105 

106def get_names(adtype): 

107 """ 

108 Returns the field names of the input datatype as a tuple. 

109 

110 Parameters 

111 ---------- 

112 adtype : dtype 

113 Input datatype 

114 

115 Examples 

116 -------- 

117 >>> from numpy.lib import recfunctions as rfn 

118 >>> rfn.get_names(np.empty((1,), dtype=int)) 

119 Traceback (most recent call last): 

120 ... 

121 AttributeError: 'numpy.ndarray' object has no attribute 'names' 

122 

123 >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) 

124 Traceback (most recent call last): 

125 ... 

126 AttributeError: 'numpy.ndarray' object has no attribute 'names' 

127 >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) 

128 >>> rfn.get_names(adtype) 

129 ('a', ('b', ('ba', 'bb'))) 

130 """ 

131 listnames = [] 

132 names = adtype.names 

133 for name in names: 

134 current = adtype[name] 

135 if current.names is not None: 

136 listnames.append((name, tuple(get_names(current)))) 

137 else: 

138 listnames.append(name) 

139 return tuple(listnames) 

140 

141 

142def get_names_flat(adtype): 

143 """ 

144 Returns the field names of the input datatype as a tuple. Nested structure 

145 are flattened beforehand. 

146 

147 Parameters 

148 ---------- 

149 adtype : dtype 

150 Input datatype 

151 

152 Examples 

153 -------- 

154 >>> from numpy.lib import recfunctions as rfn 

155 >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None 

156 Traceback (most recent call last): 

157 ... 

158 AttributeError: 'numpy.ndarray' object has no attribute 'names' 

159 >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) 

160 Traceback (most recent call last): 

161 ... 

162 AttributeError: 'numpy.ndarray' object has no attribute 'names' 

163 >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) 

164 >>> rfn.get_names_flat(adtype) 

165 ('a', 'b', 'ba', 'bb') 

166 """ 

167 listnames = [] 

168 names = adtype.names 

169 for name in names: 

170 listnames.append(name) 

171 current = adtype[name] 

172 if current.names is not None: 

173 listnames.extend(get_names_flat(current)) 

174 return tuple(listnames) 

175 

176 

177def flatten_descr(ndtype): 

178 """ 

179 Flatten a structured data-type description. 

180 

181 Examples 

182 -------- 

183 >>> from numpy.lib import recfunctions as rfn 

184 >>> ndtype = np.dtype([('a', '<i4'), ('b', [('ba', '<f8'), ('bb', '<i4')])]) 

185 >>> rfn.flatten_descr(ndtype) 

186 (('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32'))) 

187 

188 """ 

189 names = ndtype.names 

190 if names is None: 

191 return (('', ndtype),) 

192 else: 

193 descr = [] 

194 for field in names: 

195 (typ, _) = ndtype.fields[field] 

196 if typ.names is not None: 

197 descr.extend(flatten_descr(typ)) 

198 else: 

199 descr.append((field, typ)) 

200 return tuple(descr) 

201 

202 

203def _zip_dtype(seqarrays, flatten=False): 

204 newdtype = [] 

205 if flatten: 

206 for a in seqarrays: 

207 newdtype.extend(flatten_descr(a.dtype)) 

208 else: 

209 for a in seqarrays: 

210 current = a.dtype 

211 if current.names is not None and len(current.names) == 1: 

212 # special case - dtypes of 1 field are flattened 

213 newdtype.extend(_get_fieldspec(current)) 

214 else: 

215 newdtype.append(('', current)) 

216 return np.dtype(newdtype) 

217 

218 

219def _zip_descr(seqarrays, flatten=False): 

220 """ 

221 Combine the dtype description of a series of arrays. 

222 

223 Parameters 

224 ---------- 

225 seqarrays : sequence of arrays 

226 Sequence of arrays 

227 flatten : {boolean}, optional 

228 Whether to collapse nested descriptions. 

229 """ 

230 return _zip_dtype(seqarrays, flatten=flatten).descr 

231 

232 

233def get_fieldstructure(adtype, lastname=None, parents=None,): 

234 """ 

235 Returns a dictionary with fields indexing lists of their parent fields. 

236 

237 This function is used to simplify access to fields nested in other fields. 

238 

239 Parameters 

240 ---------- 

241 adtype : np.dtype 

242 Input datatype 

243 lastname : optional 

244 Last processed field name (used internally during recursion). 

245 parents : dictionary 

246 Dictionary of parent fields (used interbally during recursion). 

247 

248 Examples 

249 -------- 

250 >>> from numpy.lib import recfunctions as rfn 

251 >>> ndtype = np.dtype([('A', int), 

252 ... ('B', [('BA', int), 

253 ... ('BB', [('BBA', int), ('BBB', int)])])]) 

254 >>> rfn.get_fieldstructure(ndtype) 

255 ... # XXX: possible regression, order of BBA and BBB is swapped 

256 {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} 

257 

258 """ 

259 if parents is None: 

260 parents = {} 

261 names = adtype.names 

262 for name in names: 

263 current = adtype[name] 

264 if current.names is not None: 

265 if lastname: 

266 parents[name] = [lastname, ] 

267 else: 

268 parents[name] = [] 

269 parents.update(get_fieldstructure(current, name, parents)) 

270 else: 

271 lastparent = [_ for _ in (parents.get(lastname, []) or [])] 

272 if lastparent: 

273 lastparent.append(lastname) 

274 elif lastname: 

275 lastparent = [lastname, ] 

276 parents[name] = lastparent or [] 

277 return parents 

278 

279 

280def _izip_fields_flat(iterable): 

281 """ 

282 Returns an iterator of concatenated fields from a sequence of arrays, 

283 collapsing any nested structure. 

284 

285 """ 

286 for element in iterable: 

287 if isinstance(element, np.void): 

288 yield from _izip_fields_flat(tuple(element)) 

289 else: 

290 yield element 

291 

292 

293def _izip_fields(iterable): 

294 """ 

295 Returns an iterator of concatenated fields from a sequence of arrays. 

296 

297 """ 

298 for element in iterable: 

299 if (hasattr(element, '__iter__') and 

300 not isinstance(element, str)): 

301 yield from _izip_fields(element) 

302 elif isinstance(element, np.void) and len(tuple(element)) == 1: 

303 # this statement is the same from the previous expression 

304 yield from _izip_fields(element) 

305 else: 

306 yield element 

307 

308 

309def _izip_records(seqarrays, fill_value=None, flatten=True): 

310 """ 

311 Returns an iterator of concatenated items from a sequence of arrays. 

312 

313 Parameters 

314 ---------- 

315 seqarrays : sequence of arrays 

316 Sequence of arrays. 

317 fill_value : {None, integer} 

318 Value used to pad shorter iterables. 

319 flatten : {True, False}, 

320 Whether to 

321 """ 

322 

323 # Should we flatten the items, or just use a nested approach 

324 if flatten: 

325 zipfunc = _izip_fields_flat 

326 else: 

327 zipfunc = _izip_fields 

328 

329 for tup in itertools.zip_longest(*seqarrays, fillvalue=fill_value): 

330 yield tuple(zipfunc(tup)) 

331 

332 

333def _fix_output(output, usemask=True, asrecarray=False): 

334 """ 

335 Private function: return a recarray, a ndarray, a MaskedArray 

336 or a MaskedRecords depending on the input parameters 

337 """ 

338 if not isinstance(output, MaskedArray): 

339 usemask = False 

340 if usemask: 

341 if asrecarray: 

342 output = output.view(MaskedRecords) 

343 else: 

344 output = ma.filled(output) 

345 if asrecarray: 

346 output = output.view(recarray) 

347 return output 

348 

349 

350def _fix_defaults(output, defaults=None): 

351 """ 

352 Update the fill_value and masked data of `output` 

353 from the default given in a dictionary defaults. 

354 """ 

355 names = output.dtype.names 

356 (data, mask, fill_value) = (output.data, output.mask, output.fill_value) 

357 for (k, v) in (defaults or {}).items(): 

358 if k in names: 

359 fill_value[k] = v 

360 data[k][mask[k]] = v 

361 return output 

362 

363 

364def _merge_arrays_dispatcher(seqarrays, fill_value=None, flatten=None, 

365 usemask=None, asrecarray=None): 

366 return seqarrays 

367 

368 

369@array_function_dispatch(_merge_arrays_dispatcher) 

370def merge_arrays(seqarrays, fill_value=-1, flatten=False, 

371 usemask=False, asrecarray=False): 

372 """ 

373 Merge arrays field by field. 

374 

375 Parameters 

376 ---------- 

377 seqarrays : sequence of ndarrays 

378 Sequence of arrays 

379 fill_value : {float}, optional 

380 Filling value used to pad missing data on the shorter arrays. 

381 flatten : {False, True}, optional 

382 Whether to collapse nested fields. 

383 usemask : {False, True}, optional 

384 Whether to return a masked array or not. 

385 asrecarray : {False, True}, optional 

386 Whether to return a recarray (MaskedRecords) or not. 

387 

388 Examples 

389 -------- 

390 >>> from numpy.lib import recfunctions as rfn 

391 >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) 

392 array([( 1, 10.), ( 2, 20.), (-1, 30.)], 

393 dtype=[('f0', '<i8'), ('f1', '<f8')]) 

394 

395 >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), 

396 ... np.array([10., 20., 30.])), usemask=False) 

397 array([(1, 10.0), (2, 20.0), (-1, 30.0)], 

398 dtype=[('f0', '<i8'), ('f1', '<f8')]) 

399 >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), 

400 ... np.array([10., 20., 30.])), 

401 ... usemask=False, asrecarray=True) 

402 rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], 

403 dtype=[('a', '<i8'), ('f1', '<f8')]) 

404 

405 Notes 

406 ----- 

407 * Without a mask, the missing value will be filled with something, 

408 depending on what its corresponding type: 

409 

410 * ``-1`` for integers 

411 * ``-1.0`` for floating point numbers 

412 * ``'-'`` for characters 

413 * ``'-1'`` for strings 

414 * ``True`` for boolean values 

415 * XXX: I just obtained these values empirically 

416 """ 

417 # Only one item in the input sequence ? 

418 if (len(seqarrays) == 1): 

419 seqarrays = np.asanyarray(seqarrays[0]) 

420 # Do we have a single ndarray as input ? 

421 if isinstance(seqarrays, (ndarray, np.void)): 

422 seqdtype = seqarrays.dtype 

423 # Make sure we have named fields 

424 if seqdtype.names is None: 

425 seqdtype = np.dtype([('', seqdtype)]) 

426 if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype: 

427 # Minimal processing needed: just make sure everything's a-ok 

428 seqarrays = seqarrays.ravel() 

429 # Find what type of array we must return 

430 if usemask: 

431 if asrecarray: 

432 seqtype = MaskedRecords 

433 else: 

434 seqtype = MaskedArray 

435 elif asrecarray: 

436 seqtype = recarray 

437 else: 

438 seqtype = ndarray 

439 return seqarrays.view(dtype=seqdtype, type=seqtype) 

440 else: 

441 seqarrays = (seqarrays,) 

442 else: 

443 # Make sure we have arrays in the input sequence 

444 seqarrays = [np.asanyarray(_m) for _m in seqarrays] 

445 # Find the sizes of the inputs and their maximum 

446 sizes = tuple(a.size for a in seqarrays) 

447 maxlength = max(sizes) 

448 # Get the dtype of the output (flattening if needed) 

449 newdtype = _zip_dtype(seqarrays, flatten=flatten) 

450 # Initialize the sequences for data and mask 

451 seqdata = [] 

452 seqmask = [] 

453 # If we expect some kind of MaskedArray, make a special loop. 

454 if usemask: 

455 for (a, n) in zip(seqarrays, sizes): 

456 nbmissing = (maxlength - n) 

457 # Get the data and mask 

458 data = a.ravel().__array__() 

459 mask = ma.getmaskarray(a).ravel() 

460 # Get the filling value (if needed) 

461 if nbmissing: 

462 fval = _check_fill_value(fill_value, a.dtype) 

463 if isinstance(fval, (ndarray, np.void)): 

464 if len(fval.dtype) == 1: 

465 fval = fval.item()[0] 

466 fmsk = True 

467 else: 

468 fval = np.array(fval, dtype=a.dtype, ndmin=1) 

469 fmsk = np.ones((1,), dtype=mask.dtype) 

470 else: 

471 fval = None 

472 fmsk = True 

473 # Store an iterator padding the input to the expected length 

474 seqdata.append(itertools.chain(data, [fval] * nbmissing)) 

475 seqmask.append(itertools.chain(mask, [fmsk] * nbmissing)) 

476 # Create an iterator for the data 

477 data = tuple(_izip_records(seqdata, flatten=flatten)) 

478 output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength), 

479 mask=list(_izip_records(seqmask, flatten=flatten))) 

480 if asrecarray: 

481 output = output.view(MaskedRecords) 

482 else: 

483 # Same as before, without the mask we don't need... 

484 for (a, n) in zip(seqarrays, sizes): 

485 nbmissing = (maxlength - n) 

486 data = a.ravel().__array__() 

487 if nbmissing: 

488 fval = _check_fill_value(fill_value, a.dtype) 

489 if isinstance(fval, (ndarray, np.void)): 

490 if len(fval.dtype) == 1: 

491 fval = fval.item()[0] 

492 else: 

493 fval = np.array(fval, dtype=a.dtype, ndmin=1) 

494 else: 

495 fval = None 

496 seqdata.append(itertools.chain(data, [fval] * nbmissing)) 

497 output = np.fromiter(tuple(_izip_records(seqdata, flatten=flatten)), 

498 dtype=newdtype, count=maxlength) 

499 if asrecarray: 

500 output = output.view(recarray) 

501 # And we're done... 

502 return output 

503 

504 

505def _drop_fields_dispatcher(base, drop_names, usemask=None, asrecarray=None): 

506 return (base,) 

507 

508 

509@array_function_dispatch(_drop_fields_dispatcher) 

510def drop_fields(base, drop_names, usemask=True, asrecarray=False): 

511 """ 

512 Return a new array with fields in `drop_names` dropped. 

513 

514 Nested fields are supported. 

515 

516 ..versionchanged: 1.18.0 

517 `drop_fields` returns an array with 0 fields if all fields are dropped, 

518 rather than returning ``None`` as it did previously. 

519 

520 Parameters 

521 ---------- 

522 base : array 

523 Input array 

524 drop_names : string or sequence 

525 String or sequence of strings corresponding to the names of the 

526 fields to drop. 

527 usemask : {False, True}, optional 

528 Whether to return a masked array or not. 

529 asrecarray : string or sequence, optional 

530 Whether to return a recarray or a mrecarray (`asrecarray=True`) or 

531 a plain ndarray or masked array with flexible dtype. The default 

532 is False. 

533 

534 Examples 

535 -------- 

536 >>> from numpy.lib import recfunctions as rfn 

537 >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], 

538 ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) 

539 >>> rfn.drop_fields(a, 'a') 

540 array([((2., 3),), ((5., 6),)], 

541 dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])]) 

542 >>> rfn.drop_fields(a, 'ba') 

543 array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])]) 

544 >>> rfn.drop_fields(a, ['ba', 'bb']) 

545 array([(1,), (4,)], dtype=[('a', '<i8')]) 

546 """ 

547 if _is_string_like(drop_names): 

548 drop_names = [drop_names] 

549 else: 

550 drop_names = set(drop_names) 

551 

552 def _drop_descr(ndtype, drop_names): 

553 names = ndtype.names 

554 newdtype = [] 

555 for name in names: 

556 current = ndtype[name] 

557 if name in drop_names: 

558 continue 

559 if current.names is not None: 

560 descr = _drop_descr(current, drop_names) 

561 if descr: 

562 newdtype.append((name, descr)) 

563 else: 

564 newdtype.append((name, current)) 

565 return newdtype 

566 

567 newdtype = _drop_descr(base.dtype, drop_names) 

568 

569 output = np.empty(base.shape, dtype=newdtype) 

570 output = recursive_fill_fields(base, output) 

571 return _fix_output(output, usemask=usemask, asrecarray=asrecarray) 

572 

573 

574def _keep_fields(base, keep_names, usemask=True, asrecarray=False): 

575 """ 

576 Return a new array keeping only the fields in `keep_names`, 

577 and preserving the order of those fields. 

578 

579 Parameters 

580 ---------- 

581 base : array 

582 Input array 

583 keep_names : string or sequence 

584 String or sequence of strings corresponding to the names of the 

585 fields to keep. Order of the names will be preserved. 

586 usemask : {False, True}, optional 

587 Whether to return a masked array or not. 

588 asrecarray : string or sequence, optional 

589 Whether to return a recarray or a mrecarray (`asrecarray=True`) or 

590 a plain ndarray or masked array with flexible dtype. The default 

591 is False. 

592 """ 

593 newdtype = [(n, base.dtype[n]) for n in keep_names] 

594 output = np.empty(base.shape, dtype=newdtype) 

595 output = recursive_fill_fields(base, output) 

596 return _fix_output(output, usemask=usemask, asrecarray=asrecarray) 

597 

598 

599def _rec_drop_fields_dispatcher(base, drop_names): 

600 return (base,) 

601 

602 

603@array_function_dispatch(_rec_drop_fields_dispatcher) 

604def rec_drop_fields(base, drop_names): 

605 """ 

606 Returns a new numpy.recarray with fields in `drop_names` dropped. 

607 """ 

608 return drop_fields(base, drop_names, usemask=False, asrecarray=True) 

609 

610 

611def _rename_fields_dispatcher(base, namemapper): 

612 return (base,) 

613 

614 

615@array_function_dispatch(_rename_fields_dispatcher) 

616def rename_fields(base, namemapper): 

617 """ 

618 Rename the fields from a flexible-datatype ndarray or recarray. 

619 

620 Nested fields are supported. 

621 

622 Parameters 

623 ---------- 

624 base : ndarray 

625 Input array whose fields must be modified. 

626 namemapper : dictionary 

627 Dictionary mapping old field names to their new version. 

628 

629 Examples 

630 -------- 

631 >>> from numpy.lib import recfunctions as rfn 

632 >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], 

633 ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) 

634 >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) 

635 array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))], 

636 dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])]) 

637 

638 """ 

639 def _recursive_rename_fields(ndtype, namemapper): 

640 newdtype = [] 

641 for name in ndtype.names: 

642 newname = namemapper.get(name, name) 

643 current = ndtype[name] 

644 if current.names is not None: 

645 newdtype.append( 

646 (newname, _recursive_rename_fields(current, namemapper)) 

647 ) 

648 else: 

649 newdtype.append((newname, current)) 

650 return newdtype 

651 newdtype = _recursive_rename_fields(base.dtype, namemapper) 

652 return base.view(newdtype) 

653 

654 

655def _append_fields_dispatcher(base, names, data, dtypes=None, 

656 fill_value=None, usemask=None, asrecarray=None): 

657 yield base 

658 yield from data 

659 

660 

661@array_function_dispatch(_append_fields_dispatcher) 

662def append_fields(base, names, data, dtypes=None, 

663 fill_value=-1, usemask=True, asrecarray=False): 

664 """ 

665 Add new fields to an existing array. 

666 

667 The names of the fields are given with the `names` arguments, 

668 the corresponding values with the `data` arguments. 

669 If a single field is appended, `names`, `data` and `dtypes` do not have 

670 to be lists but just values. 

671 

672 Parameters 

673 ---------- 

674 base : array 

675 Input array to extend. 

676 names : string, sequence 

677 String or sequence of strings corresponding to the names 

678 of the new fields. 

679 data : array or sequence of arrays 

680 Array or sequence of arrays storing the fields to add to the base. 

681 dtypes : sequence of datatypes, optional 

682 Datatype or sequence of datatypes. 

683 If None, the datatypes are estimated from the `data`. 

684 fill_value : {float}, optional 

685 Filling value used to pad missing data on the shorter arrays. 

686 usemask : {False, True}, optional 

687 Whether to return a masked array or not. 

688 asrecarray : {False, True}, optional 

689 Whether to return a recarray (MaskedRecords) or not. 

690 

691 """ 

692 # Check the names 

693 if isinstance(names, (tuple, list)): 

694 if len(names) != len(data): 

695 msg = "The number of arrays does not match the number of names" 

696 raise ValueError(msg) 

697 elif isinstance(names, str): 

698 names = [names, ] 

699 data = [data, ] 

700 # 

701 if dtypes is None: 

702 data = [np.array(a, copy=False, subok=True) for a in data] 

703 data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)] 

704 else: 

705 if not isinstance(dtypes, (tuple, list)): 

706 dtypes = [dtypes, ] 

707 if len(data) != len(dtypes): 

708 if len(dtypes) == 1: 

709 dtypes = dtypes * len(data) 

710 else: 

711 msg = "The dtypes argument must be None, a dtype, or a list." 

712 raise ValueError(msg) 

713 data = [np.array(a, copy=False, subok=True, dtype=d).view([(n, d)]) 

714 for (a, n, d) in zip(data, names, dtypes)] 

715 # 

716 base = merge_arrays(base, usemask=usemask, fill_value=fill_value) 

717 if len(data) > 1: 

718 data = merge_arrays(data, flatten=True, usemask=usemask, 

719 fill_value=fill_value) 

720 else: 

721 data = data.pop() 

722 # 

723 output = ma.masked_all( 

724 max(len(base), len(data)), 

725 dtype=_get_fieldspec(base.dtype) + _get_fieldspec(data.dtype)) 

726 output = recursive_fill_fields(base, output) 

727 output = recursive_fill_fields(data, output) 

728 # 

729 return _fix_output(output, usemask=usemask, asrecarray=asrecarray) 

730 

731 

732def _rec_append_fields_dispatcher(base, names, data, dtypes=None): 

733 yield base 

734 yield from data 

735 

736 

737@array_function_dispatch(_rec_append_fields_dispatcher) 

738def rec_append_fields(base, names, data, dtypes=None): 

739 """ 

740 Add new fields to an existing array. 

741 

742 The names of the fields are given with the `names` arguments, 

743 the corresponding values with the `data` arguments. 

744 If a single field is appended, `names`, `data` and `dtypes` do not have 

745 to be lists but just values. 

746 

747 Parameters 

748 ---------- 

749 base : array 

750 Input array to extend. 

751 names : string, sequence 

752 String or sequence of strings corresponding to the names 

753 of the new fields. 

754 data : array or sequence of arrays 

755 Array or sequence of arrays storing the fields to add to the base. 

756 dtypes : sequence of datatypes, optional 

757 Datatype or sequence of datatypes. 

758 If None, the datatypes are estimated from the `data`. 

759 

760 See Also 

761 -------- 

762 append_fields 

763 

764 Returns 

765 ------- 

766 appended_array : np.recarray 

767 """ 

768 return append_fields(base, names, data=data, dtypes=dtypes, 

769 asrecarray=True, usemask=False) 

770 

771 

772def _repack_fields_dispatcher(a, align=None, recurse=None): 

773 return (a,) 

774 

775 

776@array_function_dispatch(_repack_fields_dispatcher) 

777def repack_fields(a, align=False, recurse=False): 

778 """ 

779 Re-pack the fields of a structured array or dtype in memory. 

780 

781 The memory layout of structured datatypes allows fields at arbitrary 

782 byte offsets. This means the fields can be separated by padding bytes, 

783 their offsets can be non-monotonically increasing, and they can overlap. 

784 

785 This method removes any overlaps and reorders the fields in memory so they 

786 have increasing byte offsets, and adds or removes padding bytes depending 

787 on the `align` option, which behaves like the `align` option to `np.dtype`. 

788 

789 If `align=False`, this method produces a "packed" memory layout in which 

790 each field starts at the byte the previous field ended, and any padding 

791 bytes are removed. 

792 

793 If `align=True`, this methods produces an "aligned" memory layout in which 

794 each field's offset is a multiple of its alignment, and the total itemsize 

795 is a multiple of the largest alignment, by adding padding bytes as needed. 

796 

797 Parameters 

798 ---------- 

799 a : ndarray or dtype 

800 array or dtype for which to repack the fields. 

801 align : boolean 

802 If true, use an "aligned" memory layout, otherwise use a "packed" layout. 

803 recurse : boolean 

804 If True, also repack nested structures. 

805 

806 Returns 

807 ------- 

808 repacked : ndarray or dtype 

809 Copy of `a` with fields repacked, or `a` itself if no repacking was 

810 needed. 

811 

812 Examples 

813 -------- 

814 

815 >>> from numpy.lib import recfunctions as rfn 

816 >>> def print_offsets(d): 

817 ... print("offsets:", [d.fields[name][1] for name in d.names]) 

818 ... print("itemsize:", d.itemsize) 

819 ... 

820 >>> dt = np.dtype('u1, <i8, <f8', align=True) 

821 >>> dt 

822 dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True) 

823 >>> print_offsets(dt) 

824 offsets: [0, 8, 16] 

825 itemsize: 24 

826 >>> packed_dt = rfn.repack_fields(dt) 

827 >>> packed_dt 

828 dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')]) 

829 >>> print_offsets(packed_dt) 

830 offsets: [0, 1, 9] 

831 itemsize: 17 

832 

833 """ 

834 if not isinstance(a, np.dtype): 

835 dt = repack_fields(a.dtype, align=align, recurse=recurse) 

836 return a.astype(dt, copy=False) 

837 

838 if a.names is None: 

839 return a 

840 

841 fieldinfo = [] 

842 for name in a.names: 

843 tup = a.fields[name] 

844 if recurse: 

845 fmt = repack_fields(tup[0], align=align, recurse=True) 

846 else: 

847 fmt = tup[0] 

848 

849 if len(tup) == 3: 

850 name = (tup[2], name) 

851 

852 fieldinfo.append((name, fmt)) 

853 

854 dt = np.dtype(fieldinfo, align=align) 

855 return np.dtype((a.type, dt)) 

856 

857def _get_fields_and_offsets(dt, offset=0): 

858 """ 

859 Returns a flat list of (dtype, count, offset) tuples of all the 

860 scalar fields in the dtype "dt", including nested fields, in left 

861 to right order. 

862 """ 

863 

864 # counts up elements in subarrays, including nested subarrays, and returns 

865 # base dtype and count 

866 def count_elem(dt): 

867 count = 1 

868 while dt.shape != (): 

869 for size in dt.shape: 

870 count *= size 

871 dt = dt.base 

872 return dt, count 

873 

874 fields = [] 

875 for name in dt.names: 

876 field = dt.fields[name] 

877 f_dt, f_offset = field[0], field[1] 

878 f_dt, n = count_elem(f_dt) 

879 

880 if f_dt.names is None: 

881 fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset)) 

882 else: 

883 subfields = _get_fields_and_offsets(f_dt, f_offset + offset) 

884 size = f_dt.itemsize 

885 

886 for i in range(n): 

887 if i == 0: 

888 # optimization: avoid list comprehension if no subarray 

889 fields.extend(subfields) 

890 else: 

891 fields.extend([(d, c, o + i*size) for d, c, o in subfields]) 

892 return fields 

893 

894 

895def _structured_to_unstructured_dispatcher(arr, dtype=None, copy=None, 

896 casting=None): 

897 return (arr,) 

898 

899@array_function_dispatch(_structured_to_unstructured_dispatcher) 

900def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): 

901 """ 

902 Converts and n-D structured array into an (n+1)-D unstructured array. 

903 

904 The new array will have a new last dimension equal in size to the 

905 number of field-elements of the input array. If not supplied, the output 

906 datatype is determined from the numpy type promotion rules applied to all 

907 the field datatypes. 

908 

909 Nested fields, as well as each element of any subarray fields, all count 

910 as a single field-elements. 

911 

912 Parameters 

913 ---------- 

914 arr : ndarray 

915 Structured array or dtype to convert. Cannot contain object datatype. 

916 dtype : dtype, optional 

917 The dtype of the output unstructured array. 

918 copy : bool, optional 

919 See copy argument to `ndarray.astype`. If true, always return a copy. 

920 If false, and `dtype` requirements are satisfied, a view is returned. 

921 casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional 

922 See casting argument of `ndarray.astype`. Controls what kind of data 

923 casting may occur. 

924 

925 Returns 

926 ------- 

927 unstructured : ndarray 

928 Unstructured array with one more dimension. 

929 

930 Examples 

931 -------- 

932 

933 >>> from numpy.lib import recfunctions as rfn 

934 >>> a = np.zeros(4, dtype=[('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) 

935 >>> a 

936 array([(0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.]), 

937 (0, (0., 0), [0., 0.]), (0, (0., 0), [0., 0.])], 

938 dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) 

939 >>> rfn.structured_to_unstructured(a) 

940 array([[0., 0., 0., 0., 0.], 

941 [0., 0., 0., 0., 0.], 

942 [0., 0., 0., 0., 0.], 

943 [0., 0., 0., 0., 0.]]) 

944 

945 >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], 

946 ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) 

947 >>> np.mean(rfn.structured_to_unstructured(b[['x', 'z']]), axis=-1) 

948 array([ 3. , 5.5, 9. , 11. ]) 

949 

950 """ 

951 if arr.dtype.names is None: 

952 raise ValueError('arr must be a structured array') 

953 

954 fields = _get_fields_and_offsets(arr.dtype) 

955 n_fields = len(fields) 

956 if n_fields == 0 and dtype is None: 

957 raise ValueError("arr has no fields. Unable to guess dtype") 

958 elif n_fields == 0: 

959 # too many bugs elsewhere for this to work now 

960 raise NotImplementedError("arr with no fields is not supported") 

961 

962 dts, counts, offsets = zip(*fields) 

963 names = ['f{}'.format(n) for n in range(n_fields)] 

964 

965 if dtype is None: 

966 out_dtype = np.result_type(*[dt.base for dt in dts]) 

967 else: 

968 out_dtype = dtype 

969 

970 # Use a series of views and casts to convert to an unstructured array: 

971 

972 # first view using flattened fields (doesn't work for object arrays) 

973 # Note: dts may include a shape for subarrays 

974 flattened_fields = np.dtype({'names': names, 

975 'formats': dts, 

976 'offsets': offsets, 

977 'itemsize': arr.dtype.itemsize}) 

978 with suppress_warnings() as sup: # until 1.16 (gh-12447) 

979 sup.filter(FutureWarning, "Numpy has detected") 

980 arr = arr.view(flattened_fields) 

981 

982 # next cast to a packed format with all fields converted to new dtype 

983 packed_fields = np.dtype({'names': names, 

984 'formats': [(out_dtype, dt.shape) for dt in dts]}) 

985 arr = arr.astype(packed_fields, copy=copy, casting=casting) 

986 

987 # finally is it safe to view the packed fields as the unstructured type 

988 return arr.view((out_dtype, (sum(counts),))) 

989 

990 

991def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None, 

992 align=None, copy=None, casting=None): 

993 return (arr,) 

994 

995@array_function_dispatch(_unstructured_to_structured_dispatcher) 

996def unstructured_to_structured(arr, dtype=None, names=None, align=False, 

997 copy=False, casting='unsafe'): 

998 """ 

999 Converts and n-D unstructured array into an (n-1)-D structured array. 

1000 

1001 The last dimension of the input array is converted into a structure, with 

1002 number of field-elements equal to the size of the last dimension of the 

1003 input array. By default all output fields have the input array's dtype, but 

1004 an output structured dtype with an equal number of fields-elements can be 

1005 supplied instead. 

1006 

1007 Nested fields, as well as each element of any subarray fields, all count 

1008 towards the number of field-elements. 

1009 

1010 Parameters 

1011 ---------- 

1012 arr : ndarray 

1013 Unstructured array or dtype to convert. 

1014 dtype : dtype, optional 

1015 The structured dtype of the output array 

1016 names : list of strings, optional 

1017 If dtype is not supplied, this specifies the field names for the output 

1018 dtype, in order. The field dtypes will be the same as the input array. 

1019 align : boolean, optional 

1020 Whether to create an aligned memory layout. 

1021 copy : bool, optional 

1022 See copy argument to `ndarray.astype`. If true, always return a copy. 

1023 If false, and `dtype` requirements are satisfied, a view is returned. 

1024 casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional 

1025 See casting argument of `ndarray.astype`. Controls what kind of data 

1026 casting may occur. 

1027 

1028 Returns 

1029 ------- 

1030 structured : ndarray 

1031 Structured array with fewer dimensions. 

1032 

1033 Examples 

1034 -------- 

1035 

1036 >>> from numpy.lib import recfunctions as rfn 

1037 >>> dt = np.dtype([('a', 'i4'), ('b', 'f4,u2'), ('c', 'f4', 2)]) 

1038 >>> a = np.arange(20).reshape((4,5)) 

1039 >>> a 

1040 array([[ 0, 1, 2, 3, 4], 

1041 [ 5, 6, 7, 8, 9], 

1042 [10, 11, 12, 13, 14], 

1043 [15, 16, 17, 18, 19]]) 

1044 >>> rfn.unstructured_to_structured(a, dt) 

1045 array([( 0, ( 1., 2), [ 3., 4.]), ( 5, ( 6., 7), [ 8., 9.]), 

1046 (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], 

1047 dtype=[('a', '<i4'), ('b', [('f0', '<f4'), ('f1', '<u2')]), ('c', '<f4', (2,))]) 

1048 

1049 """ 

1050 if arr.shape == (): 

1051 raise ValueError('arr must have at least one dimension') 

1052 n_elem = arr.shape[-1] 

1053 if n_elem == 0: 

1054 # too many bugs elsewhere for this to work now 

1055 raise NotImplementedError("last axis with size 0 is not supported") 

1056 

1057 if dtype is None: 

1058 if names is None: 

1059 names = ['f{}'.format(n) for n in range(n_elem)] 

1060 out_dtype = np.dtype([(n, arr.dtype) for n in names], align=align) 

1061 fields = _get_fields_and_offsets(out_dtype) 

1062 dts, counts, offsets = zip(*fields) 

1063 else: 

1064 if names is not None: 

1065 raise ValueError("don't supply both dtype and names") 

1066 # sanity check of the input dtype 

1067 fields = _get_fields_and_offsets(dtype) 

1068 if len(fields) == 0: 

1069 dts, counts, offsets = [], [], [] 

1070 else: 

1071 dts, counts, offsets = zip(*fields) 

1072 

1073 if n_elem != sum(counts): 

1074 raise ValueError('The length of the last dimension of arr must ' 

1075 'be equal to the number of fields in dtype') 

1076 out_dtype = dtype 

1077 if align and not out_dtype.isalignedstruct: 

1078 raise ValueError("align was True but dtype is not aligned") 

1079 

1080 names = ['f{}'.format(n) for n in range(len(fields))] 

1081 

1082 # Use a series of views and casts to convert to a structured array: 

1083 

1084 # first view as a packed structured array of one dtype 

1085 packed_fields = np.dtype({'names': names, 

1086 'formats': [(arr.dtype, dt.shape) for dt in dts]}) 

1087 arr = np.ascontiguousarray(arr).view(packed_fields) 

1088 

1089 # next cast to an unpacked but flattened format with varied dtypes 

1090 flattened_fields = np.dtype({'names': names, 

1091 'formats': dts, 

1092 'offsets': offsets, 

1093 'itemsize': out_dtype.itemsize}) 

1094 arr = arr.astype(flattened_fields, copy=copy, casting=casting) 

1095 

1096 # finally view as the final nested dtype and remove the last axis 

1097 return arr.view(out_dtype)[..., 0] 

1098 

1099def _apply_along_fields_dispatcher(func, arr): 

1100 return (arr,) 

1101 

1102@array_function_dispatch(_apply_along_fields_dispatcher) 

1103def apply_along_fields(func, arr): 

1104 """ 

1105 Apply function 'func' as a reduction across fields of a structured array. 

1106 

1107 This is similar to `apply_along_axis`, but treats the fields of a 

1108 structured array as an extra axis. The fields are all first cast to a 

1109 common type following the type-promotion rules from `numpy.result_type` 

1110 applied to the field's dtypes. 

1111 

1112 Parameters 

1113 ---------- 

1114 func : function 

1115 Function to apply on the "field" dimension. This function must 

1116 support an `axis` argument, like np.mean, np.sum, etc. 

1117 arr : ndarray 

1118 Structured array for which to apply func. 

1119 

1120 Returns 

1121 ------- 

1122 out : ndarray 

1123 Result of the recution operation 

1124 

1125 Examples 

1126 -------- 

1127 

1128 >>> from numpy.lib import recfunctions as rfn 

1129 >>> b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], 

1130 ... dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) 

1131 >>> rfn.apply_along_fields(np.mean, b) 

1132 array([ 2.66666667, 5.33333333, 8.66666667, 11. ]) 

1133 >>> rfn.apply_along_fields(np.mean, b[['x', 'z']]) 

1134 array([ 3. , 5.5, 9. , 11. ]) 

1135 

1136 """ 

1137 if arr.dtype.names is None: 

1138 raise ValueError('arr must be a structured array') 

1139 

1140 uarr = structured_to_unstructured(arr) 

1141 return func(uarr, axis=-1) 

1142 # works and avoids axis requirement, but very, very slow: 

1143 #return np.apply_along_axis(func, -1, uarr) 

1144 

1145def _assign_fields_by_name_dispatcher(dst, src, zero_unassigned=None): 

1146 return dst, src 

1147 

1148@array_function_dispatch(_assign_fields_by_name_dispatcher) 

1149def assign_fields_by_name(dst, src, zero_unassigned=True): 

1150 """ 

1151 Assigns values from one structured array to another by field name. 

1152 

1153 Normally in numpy >= 1.14, assignment of one structured array to another 

1154 copies fields "by position", meaning that the first field from the src is 

1155 copied to the first field of the dst, and so on, regardless of field name. 

1156 

1157 This function instead copies "by field name", such that fields in the dst 

1158 are assigned from the identically named field in the src. This applies 

1159 recursively for nested structures. This is how structure assignment worked 

1160 in numpy >= 1.6 to <= 1.13. 

1161 

1162 Parameters 

1163 ---------- 

1164 dst : ndarray 

1165 src : ndarray 

1166 The source and destination arrays during assignment. 

1167 zero_unassigned : bool, optional 

1168 If True, fields in the dst for which there was no matching 

1169 field in the src are filled with the value 0 (zero). This 

1170 was the behavior of numpy <= 1.13. If False, those fields 

1171 are not modified. 

1172 """ 

1173 

1174 if dst.dtype.names is None: 

1175 dst[...] = src 

1176 return 

1177 

1178 for name in dst.dtype.names: 

1179 if name not in src.dtype.names: 

1180 if zero_unassigned: 

1181 dst[name] = 0 

1182 else: 

1183 assign_fields_by_name(dst[name], src[name], 

1184 zero_unassigned) 

1185 

1186def _require_fields_dispatcher(array, required_dtype): 

1187 return (array,) 

1188 

1189@array_function_dispatch(_require_fields_dispatcher) 

1190def require_fields(array, required_dtype): 

1191 """ 

1192 Casts a structured array to a new dtype using assignment by field-name. 

1193 

1194 This function assigns from the old to the new array by name, so the 

1195 value of a field in the output array is the value of the field with the 

1196 same name in the source array. This has the effect of creating a new 

1197 ndarray containing only the fields "required" by the required_dtype. 

1198 

1199 If a field name in the required_dtype does not exist in the 

1200 input array, that field is created and set to 0 in the output array. 

1201 

1202 Parameters 

1203 ---------- 

1204 a : ndarray 

1205 array to cast 

1206 required_dtype : dtype 

1207 datatype for output array 

1208 

1209 Returns 

1210 ------- 

1211 out : ndarray 

1212 array with the new dtype, with field values copied from the fields in 

1213 the input array with the same name 

1214 

1215 Examples 

1216 -------- 

1217 

1218 >>> from numpy.lib import recfunctions as rfn 

1219 >>> a = np.ones(4, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) 

1220 >>> rfn.require_fields(a, [('b', 'f4'), ('c', 'u1')]) 

1221 array([(1., 1), (1., 1), (1., 1), (1., 1)], 

1222 dtype=[('b', '<f4'), ('c', 'u1')]) 

1223 >>> rfn.require_fields(a, [('b', 'f4'), ('newf', 'u1')]) 

1224 array([(1., 0), (1., 0), (1., 0), (1., 0)], 

1225 dtype=[('b', '<f4'), ('newf', 'u1')]) 

1226 

1227 """ 

1228 out = np.empty(array.shape, dtype=required_dtype) 

1229 assign_fields_by_name(out, array) 

1230 return out 

1231 

1232 

1233def _stack_arrays_dispatcher(arrays, defaults=None, usemask=None, 

1234 asrecarray=None, autoconvert=None): 

1235 return arrays 

1236 

1237 

1238@array_function_dispatch(_stack_arrays_dispatcher) 

1239def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, 

1240 autoconvert=False): 

1241 """ 

1242 Superposes arrays fields by fields 

1243 

1244 Parameters 

1245 ---------- 

1246 arrays : array or sequence 

1247 Sequence of input arrays. 

1248 defaults : dictionary, optional 

1249 Dictionary mapping field names to the corresponding default values. 

1250 usemask : {True, False}, optional 

1251 Whether to return a MaskedArray (or MaskedRecords is 

1252 `asrecarray==True`) or a ndarray. 

1253 asrecarray : {False, True}, optional 

1254 Whether to return a recarray (or MaskedRecords if `usemask==True`) 

1255 or just a flexible-type ndarray. 

1256 autoconvert : {False, True}, optional 

1257 Whether automatically cast the type of the field to the maximum. 

1258 

1259 Examples 

1260 -------- 

1261 >>> from numpy.lib import recfunctions as rfn 

1262 >>> x = np.array([1, 2,]) 

1263 >>> rfn.stack_arrays(x) is x 

1264 True 

1265 >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) 

1266 >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], 

1267 ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) 

1268 >>> test = rfn.stack_arrays((z,zz)) 

1269 >>> test 

1270 masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0), 

1271 (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)], 

1272 mask=[(False, False, True), (False, False, True), 

1273 (False, False, False), (False, False, False), 

1274 (False, False, False)], 

1275 fill_value=(b'N/A', 1.e+20, 1.e+20), 

1276 dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')]) 

1277 

1278 """ 

1279 if isinstance(arrays, ndarray): 

1280 return arrays 

1281 elif len(arrays) == 1: 

1282 return arrays[0] 

1283 seqarrays = [np.asanyarray(a).ravel() for a in arrays] 

1284 nrecords = [len(a) for a in seqarrays] 

1285 ndtype = [a.dtype for a in seqarrays] 

1286 fldnames = [d.names for d in ndtype] 

1287 # 

1288 dtype_l = ndtype[0] 

1289 newdescr = _get_fieldspec(dtype_l) 

1290 names = [n for n, d in newdescr] 

1291 for dtype_n in ndtype[1:]: 

1292 for fname, fdtype in _get_fieldspec(dtype_n): 

1293 if fname not in names: 

1294 newdescr.append((fname, fdtype)) 

1295 names.append(fname) 

1296 else: 

1297 nameidx = names.index(fname) 

1298 _, cdtype = newdescr[nameidx] 

1299 if autoconvert: 

1300 newdescr[nameidx] = (fname, max(fdtype, cdtype)) 

1301 elif fdtype != cdtype: 

1302 raise TypeError("Incompatible type '%s' <> '%s'" % 

1303 (cdtype, fdtype)) 

1304 # Only one field: use concatenate 

1305 if len(newdescr) == 1: 

1306 output = ma.concatenate(seqarrays) 

1307 else: 

1308 # 

1309 output = ma.masked_all((np.sum(nrecords),), newdescr) 

1310 offset = np.cumsum(np.r_[0, nrecords]) 

1311 seen = [] 

1312 for (a, n, i, j) in zip(seqarrays, fldnames, offset[:-1], offset[1:]): 

1313 names = a.dtype.names 

1314 if names is None: 

1315 output['f%i' % len(seen)][i:j] = a 

1316 else: 

1317 for name in n: 

1318 output[name][i:j] = a[name] 

1319 if name not in seen: 

1320 seen.append(name) 

1321 # 

1322 return _fix_output(_fix_defaults(output, defaults), 

1323 usemask=usemask, asrecarray=asrecarray) 

1324 

1325 

1326def _find_duplicates_dispatcher( 

1327 a, key=None, ignoremask=None, return_index=None): 

1328 return (a,) 

1329 

1330 

1331@array_function_dispatch(_find_duplicates_dispatcher) 

1332def find_duplicates(a, key=None, ignoremask=True, return_index=False): 

1333 """ 

1334 Find the duplicates in a structured array along a given key 

1335 

1336 Parameters 

1337 ---------- 

1338 a : array-like 

1339 Input array 

1340 key : {string, None}, optional 

1341 Name of the fields along which to check the duplicates. 

1342 If None, the search is performed by records 

1343 ignoremask : {True, False}, optional 

1344 Whether masked data should be discarded or considered as duplicates. 

1345 return_index : {False, True}, optional 

1346 Whether to return the indices of the duplicated values. 

1347 

1348 Examples 

1349 -------- 

1350 >>> from numpy.lib import recfunctions as rfn 

1351 >>> ndtype = [('a', int)] 

1352 >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], 

1353 ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) 

1354 >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) 

1355 (masked_array(data=[(1,), (1,), (2,), (2,)], 

1356 mask=[(False,), (False,), (False,), (False,)], 

1357 fill_value=(999999,), 

1358 dtype=[('a', '<i8')]), array([0, 1, 3, 4])) 

1359 """ 

1360 a = np.asanyarray(a).ravel() 

1361 # Get a dictionary of fields 

1362 fields = get_fieldstructure(a.dtype) 

1363 # Get the sorting data (by selecting the corresponding field) 

1364 base = a 

1365 if key: 

1366 for f in fields[key]: 

1367 base = base[f] 

1368 base = base[key] 

1369 # Get the sorting indices and the sorted data 

1370 sortidx = base.argsort() 

1371 sortedbase = base[sortidx] 

1372 sorteddata = sortedbase.filled() 

1373 # Compare the sorting data 

1374 flag = (sorteddata[:-1] == sorteddata[1:]) 

1375 # If masked data must be ignored, set the flag to false where needed 

1376 if ignoremask: 

1377 sortedmask = sortedbase.recordmask 

1378 flag[sortedmask[1:]] = False 

1379 flag = np.concatenate(([False], flag)) 

1380 # We need to take the point on the left as well (else we're missing it) 

1381 flag[:-1] = flag[:-1] + flag[1:] 

1382 duplicates = a[sortidx][flag] 

1383 if return_index: 

1384 return (duplicates, sortidx[flag]) 

1385 else: 

1386 return duplicates 

1387 

1388 

1389def _join_by_dispatcher( 

1390 key, r1, r2, jointype=None, r1postfix=None, r2postfix=None, 

1391 defaults=None, usemask=None, asrecarray=None): 

1392 return (r1, r2) 

1393 

1394 

1395@array_function_dispatch(_join_by_dispatcher) 

1396def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', 

1397 defaults=None, usemask=True, asrecarray=False): 

1398 """ 

1399 Join arrays `r1` and `r2` on key `key`. 

1400 

1401 The key should be either a string or a sequence of string corresponding 

1402 to the fields used to join the array. An exception is raised if the 

1403 `key` field cannot be found in the two input arrays. Neither `r1` nor 

1404 `r2` should have any duplicates along `key`: the presence of duplicates 

1405 will make the output quite unreliable. Note that duplicates are not 

1406 looked for by the algorithm. 

1407 

1408 Parameters 

1409 ---------- 

1410 key : {string, sequence} 

1411 A string or a sequence of strings corresponding to the fields used 

1412 for comparison. 

1413 r1, r2 : arrays 

1414 Structured arrays. 

1415 jointype : {'inner', 'outer', 'leftouter'}, optional 

1416 If 'inner', returns the elements common to both r1 and r2. 

1417 If 'outer', returns the common elements as well as the elements of 

1418 r1 not in r2 and the elements of not in r2. 

1419 If 'leftouter', returns the common elements and the elements of r1 

1420 not in r2. 

1421 r1postfix : string, optional 

1422 String appended to the names of the fields of r1 that are present 

1423 in r2 but absent of the key. 

1424 r2postfix : string, optional 

1425 String appended to the names of the fields of r2 that are present 

1426 in r1 but absent of the key. 

1427 defaults : {dictionary}, optional 

1428 Dictionary mapping field names to the corresponding default values. 

1429 usemask : {True, False}, optional 

1430 Whether to return a MaskedArray (or MaskedRecords is 

1431 `asrecarray==True`) or a ndarray. 

1432 asrecarray : {False, True}, optional 

1433 Whether to return a recarray (or MaskedRecords if `usemask==True`) 

1434 or just a flexible-type ndarray. 

1435 

1436 Notes 

1437 ----- 

1438 * The output is sorted along the key. 

1439 * A temporary array is formed by dropping the fields not in the key for 

1440 the two arrays and concatenating the result. This array is then 

1441 sorted, and the common entries selected. The output is constructed by 

1442 filling the fields with the selected entries. Matching is not 

1443 preserved if there are some duplicates... 

1444 

1445 """ 

1446 # Check jointype 

1447 if jointype not in ('inner', 'outer', 'leftouter'): 

1448 raise ValueError( 

1449 "The 'jointype' argument should be in 'inner', " 

1450 "'outer' or 'leftouter' (got '%s' instead)" % jointype 

1451 ) 

1452 # If we have a single key, put it in a tuple 

1453 if isinstance(key, str): 

1454 key = (key,) 

1455 

1456 # Check the keys 

1457 if len(set(key)) != len(key): 

1458 dup = next(x for n,x in enumerate(key) if x in key[n+1:]) 

1459 raise ValueError("duplicate join key %r" % dup) 

1460 for name in key: 

1461 if name not in r1.dtype.names: 

1462 raise ValueError('r1 does not have key field %r' % name) 

1463 if name not in r2.dtype.names: 

1464 raise ValueError('r2 does not have key field %r' % name) 

1465 

1466 # Make sure we work with ravelled arrays 

1467 r1 = r1.ravel() 

1468 r2 = r2.ravel() 

1469 # Fixme: nb2 below is never used. Commenting out for pyflakes. 

1470 # (nb1, nb2) = (len(r1), len(r2)) 

1471 nb1 = len(r1) 

1472 (r1names, r2names) = (r1.dtype.names, r2.dtype.names) 

1473 

1474 # Check the names for collision 

1475 collisions = (set(r1names) & set(r2names)) - set(key) 

1476 if collisions and not (r1postfix or r2postfix): 

1477 msg = "r1 and r2 contain common names, r1postfix and r2postfix " 

1478 msg += "can't both be empty" 

1479 raise ValueError(msg) 

1480 

1481 # Make temporary arrays of just the keys 

1482 # (use order of keys in `r1` for back-compatibility) 

1483 key1 = [ n for n in r1names if n in key ] 

1484 r1k = _keep_fields(r1, key1) 

1485 r2k = _keep_fields(r2, key1) 

1486 

1487 # Concatenate the two arrays for comparison 

1488 aux = ma.concatenate((r1k, r2k)) 

1489 idx_sort = aux.argsort(order=key) 

1490 aux = aux[idx_sort] 

1491 # 

1492 # Get the common keys 

1493 flag_in = ma.concatenate(([False], aux[1:] == aux[:-1])) 

1494 flag_in[:-1] = flag_in[1:] + flag_in[:-1] 

1495 idx_in = idx_sort[flag_in] 

1496 idx_1 = idx_in[(idx_in < nb1)] 

1497 idx_2 = idx_in[(idx_in >= nb1)] - nb1 

1498 (r1cmn, r2cmn) = (len(idx_1), len(idx_2)) 

1499 if jointype == 'inner': 

1500 (r1spc, r2spc) = (0, 0) 

1501 elif jointype == 'outer': 

1502 idx_out = idx_sort[~flag_in] 

1503 idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) 

1504 idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1)) 

1505 (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn) 

1506 elif jointype == 'leftouter': 

1507 idx_out = idx_sort[~flag_in] 

1508 idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) 

1509 (r1spc, r2spc) = (len(idx_1) - r1cmn, 0) 

1510 # Select the entries from each input 

1511 (s1, s2) = (r1[idx_1], r2[idx_2]) 

1512 # 

1513 # Build the new description of the output array ....... 

1514 # Start with the key fields 

1515 ndtype = _get_fieldspec(r1k.dtype) 

1516 

1517 # Add the fields from r1 

1518 for fname, fdtype in _get_fieldspec(r1.dtype): 

1519 if fname not in key: 

1520 ndtype.append((fname, fdtype)) 

1521 

1522 # Add the fields from r2 

1523 for fname, fdtype in _get_fieldspec(r2.dtype): 

1524 # Have we seen the current name already ? 

1525 # we need to rebuild this list every time 

1526 names = list(name for name, dtype in ndtype) 

1527 try: 

1528 nameidx = names.index(fname) 

1529 except ValueError: 

1530 #... we haven't: just add the description to the current list 

1531 ndtype.append((fname, fdtype)) 

1532 else: 

1533 # collision 

1534 _, cdtype = ndtype[nameidx] 

1535 if fname in key: 

1536 # The current field is part of the key: take the largest dtype 

1537 ndtype[nameidx] = (fname, max(fdtype, cdtype)) 

1538 else: 

1539 # The current field is not part of the key: add the suffixes, 

1540 # and place the new field adjacent to the old one 

1541 ndtype[nameidx:nameidx + 1] = [ 

1542 (fname + r1postfix, cdtype), 

1543 (fname + r2postfix, fdtype) 

1544 ] 

1545 # Rebuild a dtype from the new fields 

1546 ndtype = np.dtype(ndtype) 

1547 # Find the largest nb of common fields : 

1548 # r1cmn and r2cmn should be equal, but... 

1549 cmn = max(r1cmn, r2cmn) 

1550 # Construct an empty array 

1551 output = ma.masked_all((cmn + r1spc + r2spc,), dtype=ndtype) 

1552 names = output.dtype.names 

1553 for f in r1names: 

1554 selected = s1[f] 

1555 if f not in names or (f in r2names and not r2postfix and f not in key): 

1556 f += r1postfix 

1557 current = output[f] 

1558 current[:r1cmn] = selected[:r1cmn] 

1559 if jointype in ('outer', 'leftouter'): 

1560 current[cmn:cmn + r1spc] = selected[r1cmn:] 

1561 for f in r2names: 

1562 selected = s2[f] 

1563 if f not in names or (f in r1names and not r1postfix and f not in key): 

1564 f += r2postfix 

1565 current = output[f] 

1566 current[:r2cmn] = selected[:r2cmn] 

1567 if (jointype == 'outer') and r2spc: 

1568 current[-r2spc:] = selected[r2cmn:] 

1569 # Sort and finalize the output 

1570 output.sort(order=key) 

1571 kwargs = dict(usemask=usemask, asrecarray=asrecarray) 

1572 return _fix_output(_fix_defaults(output, defaults), **kwargs) 

1573 

1574 

1575def _rec_join_dispatcher( 

1576 key, r1, r2, jointype=None, r1postfix=None, r2postfix=None, 

1577 defaults=None): 

1578 return (r1, r2) 

1579 

1580 

1581@array_function_dispatch(_rec_join_dispatcher) 

1582def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', 

1583 defaults=None): 

1584 """ 

1585 Join arrays `r1` and `r2` on keys. 

1586 Alternative to join_by, that always returns a np.recarray. 

1587 

1588 See Also 

1589 -------- 

1590 join_by : equivalent function 

1591 """ 

1592 kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix, 

1593 defaults=defaults, usemask=False, asrecarray=True) 

1594 return join_by(key, r1, r2, **kwargs)