Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""A collection of functions designed to help I/O with ascii files. 

2 

3""" 

4__docformat__ = "restructuredtext en" 

5 

6import numpy as np 

7import numpy.core.numeric as nx 

8from numpy.compat import asbytes, asunicode, bytes 

9 

10 

11def _decode_line(line, encoding=None): 

12 """Decode bytes from binary input streams. 

13 

14 Defaults to decoding from 'latin1'. That differs from the behavior of 

15 np.compat.asunicode that decodes from 'ascii'. 

16 

17 Parameters 

18 ---------- 

19 line : str or bytes 

20 Line to be decoded. 

21 

22 Returns 

23 ------- 

24 decoded_line : unicode 

25 Unicode in Python 2, a str (unicode) in Python 3. 

26 

27 """ 

28 if type(line) is bytes: 

29 if encoding is None: 

30 line = line.decode('latin1') 

31 else: 

32 line = line.decode(encoding) 

33 

34 return line 

35 

36 

37def _is_string_like(obj): 

38 """ 

39 Check whether obj behaves like a string. 

40 """ 

41 try: 

42 obj + '' 

43 except (TypeError, ValueError): 

44 return False 

45 return True 

46 

47 

48def _is_bytes_like(obj): 

49 """ 

50 Check whether obj behaves like a bytes object. 

51 """ 

52 try: 

53 obj + b'' 

54 except (TypeError, ValueError): 

55 return False 

56 return True 

57 

58 

59def has_nested_fields(ndtype): 

60 """ 

61 Returns whether one or several fields of a dtype are nested. 

62 

63 Parameters 

64 ---------- 

65 ndtype : dtype 

66 Data-type of a structured array. 

67 

68 Raises 

69 ------ 

70 AttributeError 

71 If `ndtype` does not have a `names` attribute. 

72 

73 Examples 

74 -------- 

75 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)]) 

76 >>> np.lib._iotools.has_nested_fields(dt) 

77 False 

78 

79 """ 

80 for name in ndtype.names or (): 

81 if ndtype[name].names is not None: 

82 return True 

83 return False 

84 

85 

86def flatten_dtype(ndtype, flatten_base=False): 

87 """ 

88 Unpack a structured data-type by collapsing nested fields and/or fields 

89 with a shape. 

90 

91 Note that the field names are lost. 

92 

93 Parameters 

94 ---------- 

95 ndtype : dtype 

96 The datatype to collapse 

97 flatten_base : bool, optional 

98 If True, transform a field with a shape into several fields. Default is 

99 False. 

100 

101 Examples 

102 -------- 

103 >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 

104 ... ('block', int, (2, 3))]) 

105 >>> np.lib._iotools.flatten_dtype(dt) 

106 [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] 

107 >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) 

108 [dtype('S4'), 

109 dtype('float64'), 

110 dtype('float64'), 

111 dtype('int64'), 

112 dtype('int64'), 

113 dtype('int64'), 

114 dtype('int64'), 

115 dtype('int64'), 

116 dtype('int64')] 

117 

118 """ 

119 names = ndtype.names 

120 if names is None: 

121 if flatten_base: 

122 return [ndtype.base] * int(np.prod(ndtype.shape)) 

123 return [ndtype.base] 

124 else: 

125 types = [] 

126 for field in names: 

127 info = ndtype.fields[field] 

128 flat_dt = flatten_dtype(info[0], flatten_base) 

129 types.extend(flat_dt) 

130 return types 

131 

132 

133class LineSplitter: 

134 """ 

135 Object to split a string at a given delimiter or at given places. 

136 

137 Parameters 

138 ---------- 

139 delimiter : str, int, or sequence of ints, optional 

140 If a string, character used to delimit consecutive fields. 

141 If an integer or a sequence of integers, width(s) of each field. 

142 comments : str, optional 

143 Character used to mark the beginning of a comment. Default is '#'. 

144 autostrip : bool, optional 

145 Whether to strip each individual field. Default is True. 

146 

147 """ 

148 

149 def autostrip(self, method): 

150 """ 

151 Wrapper to strip each member of the output of `method`. 

152 

153 Parameters 

154 ---------- 

155 method : function 

156 Function that takes a single argument and returns a sequence of 

157 strings. 

158 

159 Returns 

160 ------- 

161 wrapped : function 

162 The result of wrapping `method`. `wrapped` takes a single input 

163 argument and returns a list of strings that are stripped of 

164 white-space. 

165 

166 """ 

167 return lambda input: [_.strip() for _ in method(input)] 

168 

169 def __init__(self, delimiter=None, comments='#', autostrip=True, 

170 encoding=None): 

171 delimiter = _decode_line(delimiter) 

172 comments = _decode_line(comments) 

173 

174 self.comments = comments 

175 

176 # Delimiter is a character 

177 if (delimiter is None) or isinstance(delimiter, str): 

178 delimiter = delimiter or None 

179 _handyman = self._delimited_splitter 

180 # Delimiter is a list of field widths 

181 elif hasattr(delimiter, '__iter__'): 

182 _handyman = self._variablewidth_splitter 

183 idx = np.cumsum([0] + list(delimiter)) 

184 delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])] 

185 # Delimiter is a single integer 

186 elif int(delimiter): 

187 (_handyman, delimiter) = ( 

188 self._fixedwidth_splitter, int(delimiter)) 

189 else: 

190 (_handyman, delimiter) = (self._delimited_splitter, None) 

191 self.delimiter = delimiter 

192 if autostrip: 

193 self._handyman = self.autostrip(_handyman) 

194 else: 

195 self._handyman = _handyman 

196 self.encoding = encoding 

197 

198 def _delimited_splitter(self, line): 

199 """Chop off comments, strip, and split at delimiter. """ 

200 if self.comments is not None: 

201 line = line.split(self.comments)[0] 

202 line = line.strip(" \r\n") 

203 if not line: 

204 return [] 

205 return line.split(self.delimiter) 

206 

207 def _fixedwidth_splitter(self, line): 

208 if self.comments is not None: 

209 line = line.split(self.comments)[0] 

210 line = line.strip("\r\n") 

211 if not line: 

212 return [] 

213 fixed = self.delimiter 

214 slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)] 

215 return [line[s] for s in slices] 

216 

217 def _variablewidth_splitter(self, line): 

218 if self.comments is not None: 

219 line = line.split(self.comments)[0] 

220 if not line: 

221 return [] 

222 slices = self.delimiter 

223 return [line[s] for s in slices] 

224 

225 def __call__(self, line): 

226 return self._handyman(_decode_line(line, self.encoding)) 

227 

228 

229class NameValidator: 

230 """ 

231 Object to validate a list of strings to use as field names. 

232 

233 The strings are stripped of any non alphanumeric character, and spaces 

234 are replaced by '_'. During instantiation, the user can define a list 

235 of names to exclude, as well as a list of invalid characters. Names in 

236 the exclusion list are appended a '_' character. 

237 

238 Once an instance has been created, it can be called with a list of 

239 names, and a list of valid names will be created. The `__call__` 

240 method accepts an optional keyword "default" that sets the default name 

241 in case of ambiguity. By default this is 'f', so that names will 

242 default to `f0`, `f1`, etc. 

243 

244 Parameters 

245 ---------- 

246 excludelist : sequence, optional 

247 A list of names to exclude. This list is appended to the default 

248 list ['return', 'file', 'print']. Excluded names are appended an 

249 underscore: for example, `file` becomes `file_` if supplied. 

250 deletechars : str, optional 

251 A string combining invalid characters that must be deleted from the 

252 names. 

253 case_sensitive : {True, False, 'upper', 'lower'}, optional 

254 * If True, field names are case-sensitive. 

255 * If False or 'upper', field names are converted to upper case. 

256 * If 'lower', field names are converted to lower case. 

257 

258 The default value is True. 

259 replace_space : '_', optional 

260 Character(s) used in replacement of white spaces. 

261 

262 Notes 

263 ----- 

264 Calling an instance of `NameValidator` is the same as calling its 

265 method `validate`. 

266 

267 Examples 

268 -------- 

269 >>> validator = np.lib._iotools.NameValidator() 

270 >>> validator(['file', 'field2', 'with space', 'CaSe']) 

271 ('file_', 'field2', 'with_space', 'CaSe') 

272 

273 >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], 

274 ... deletechars='q', 

275 ... case_sensitive=False) 

276 >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) 

277 ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') 

278 

279 """ 

280 

281 defaultexcludelist = ['return', 'file', 'print'] 

282 defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") 

283 

284 def __init__(self, excludelist=None, deletechars=None, 

285 case_sensitive=None, replace_space='_'): 

286 # Process the exclusion list .. 

287 if excludelist is None: 

288 excludelist = [] 

289 excludelist.extend(self.defaultexcludelist) 

290 self.excludelist = excludelist 

291 # Process the list of characters to delete 

292 if deletechars is None: 

293 delete = self.defaultdeletechars 

294 else: 

295 delete = set(deletechars) 

296 delete.add('"') 

297 self.deletechars = delete 

298 # Process the case option ..... 

299 if (case_sensitive is None) or (case_sensitive is True): 

300 self.case_converter = lambda x: x 

301 elif (case_sensitive is False) or case_sensitive.startswith('u'): 

302 self.case_converter = lambda x: x.upper() 

303 elif case_sensitive.startswith('l'): 

304 self.case_converter = lambda x: x.lower() 

305 else: 

306 msg = 'unrecognized case_sensitive value %s.' % case_sensitive 

307 raise ValueError(msg) 

308 

309 self.replace_space = replace_space 

310 

311 def validate(self, names, defaultfmt="f%i", nbfields=None): 

312 """ 

313 Validate a list of strings as field names for a structured array. 

314 

315 Parameters 

316 ---------- 

317 names : sequence of str 

318 Strings to be validated. 

319 defaultfmt : str, optional 

320 Default format string, used if validating a given string 

321 reduces its length to zero. 

322 nbfields : integer, optional 

323 Final number of validated names, used to expand or shrink the 

324 initial list of names. 

325 

326 Returns 

327 ------- 

328 validatednames : list of str 

329 The list of validated field names. 

330 

331 Notes 

332 ----- 

333 A `NameValidator` instance can be called directly, which is the 

334 same as calling `validate`. For examples, see `NameValidator`. 

335 

336 """ 

337 # Initial checks .............. 

338 if (names is None): 

339 if (nbfields is None): 

340 return None 

341 names = [] 

342 if isinstance(names, str): 

343 names = [names, ] 

344 if nbfields is not None: 

345 nbnames = len(names) 

346 if (nbnames < nbfields): 

347 names = list(names) + [''] * (nbfields - nbnames) 

348 elif (nbnames > nbfields): 

349 names = names[:nbfields] 

350 # Set some shortcuts ........... 

351 deletechars = self.deletechars 

352 excludelist = self.excludelist 

353 case_converter = self.case_converter 

354 replace_space = self.replace_space 

355 # Initializes some variables ... 

356 validatednames = [] 

357 seen = dict() 

358 nbempty = 0 

359 

360 for item in names: 

361 item = case_converter(item).strip() 

362 if replace_space: 

363 item = item.replace(' ', replace_space) 

364 item = ''.join([c for c in item if c not in deletechars]) 

365 if item == '': 

366 item = defaultfmt % nbempty 

367 while item in names: 

368 nbempty += 1 

369 item = defaultfmt % nbempty 

370 nbempty += 1 

371 elif item in excludelist: 

372 item += '_' 

373 cnt = seen.get(item, 0) 

374 if cnt > 0: 

375 validatednames.append(item + '_%d' % cnt) 

376 else: 

377 validatednames.append(item) 

378 seen[item] = cnt + 1 

379 return tuple(validatednames) 

380 

381 def __call__(self, names, defaultfmt="f%i", nbfields=None): 

382 return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields) 

383 

384 

385def str2bool(value): 

386 """ 

387 Tries to transform a string supposed to represent a boolean to a boolean. 

388 

389 Parameters 

390 ---------- 

391 value : str 

392 The string that is transformed to a boolean. 

393 

394 Returns 

395 ------- 

396 boolval : bool 

397 The boolean representation of `value`. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 If the string is not 'True' or 'False' (case independent) 

403 

404 Examples 

405 -------- 

406 >>> np.lib._iotools.str2bool('TRUE') 

407 True 

408 >>> np.lib._iotools.str2bool('false') 

409 False 

410 

411 """ 

412 value = value.upper() 

413 if value == 'TRUE': 

414 return True 

415 elif value == 'FALSE': 

416 return False 

417 else: 

418 raise ValueError("Invalid boolean") 

419 

420 

421class ConverterError(Exception): 

422 """ 

423 Exception raised when an error occurs in a converter for string values. 

424 

425 """ 

426 pass 

427 

428 

429class ConverterLockError(ConverterError): 

430 """ 

431 Exception raised when an attempt is made to upgrade a locked converter. 

432 

433 """ 

434 pass 

435 

436 

437class ConversionWarning(UserWarning): 

438 """ 

439 Warning issued when a string converter has a problem. 

440 

441 Notes 

442 ----- 

443 In `genfromtxt` a `ConversionWarning` is issued if raising exceptions 

444 is explicitly suppressed with the "invalid_raise" keyword. 

445 

446 """ 

447 pass 

448 

449 

450class StringConverter: 

451 """ 

452 Factory class for function transforming a string into another object 

453 (int, float). 

454 

455 After initialization, an instance can be called to transform a string 

456 into another object. If the string is recognized as representing a 

457 missing value, a default value is returned. 

458 

459 Attributes 

460 ---------- 

461 func : function 

462 Function used for the conversion. 

463 default : any 

464 Default value to return when the input corresponds to a missing 

465 value. 

466 type : type 

467 Type of the output. 

468 _status : int 

469 Integer representing the order of the conversion. 

470 _mapper : sequence of tuples 

471 Sequence of tuples (dtype, function, default value) to evaluate in 

472 order. 

473 _locked : bool 

474 Holds `locked` parameter. 

475 

476 Parameters 

477 ---------- 

478 dtype_or_func : {None, dtype, function}, optional 

479 If a `dtype`, specifies the input data type, used to define a basic 

480 function and a default value for missing data. For example, when 

481 `dtype` is float, the `func` attribute is set to `float` and the 

482 default value to `np.nan`. If a function, this function is used to 

483 convert a string to another object. In this case, it is recommended 

484 to give an associated default value as input. 

485 default : any, optional 

486 Value to return by default, that is, when the string to be 

487 converted is flagged as missing. If not given, `StringConverter` 

488 tries to supply a reasonable default value. 

489 missing_values : {None, sequence of str}, optional 

490 ``None`` or sequence of strings indicating a missing value. If ``None`` 

491 then missing values are indicated by empty entries. The default is 

492 ``None``. 

493 locked : bool, optional 

494 Whether the StringConverter should be locked to prevent automatic 

495 upgrade or not. Default is False. 

496 

497 """ 

498 _mapper = [(nx.bool_, str2bool, False), 

499 (nx.int_, int, -1),] 

500 

501 # On 32-bit systems, we need to make sure that we explicitly include 

502 # nx.int64 since ns.int_ is nx.int32. 

503 if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: 

504 _mapper.append((nx.int64, int, -1)) 

505 

506 _mapper.extend([(nx.float64, float, nx.nan), 

507 (nx.complex128, complex, nx.nan + 0j), 

508 (nx.longdouble, nx.longdouble, nx.nan), 

509 (nx.unicode_, asunicode, '???'), 

510 (nx.string_, asbytes, '???'), 

511 # If a non-default dtype is passed, fall back to generic 

512 # ones (should only be used for the converter) 

513 (nx.integer, int, -1), 

514 (nx.floating, float, nx.nan), 

515 (nx.complexfloating, complex, nx.nan + 0j),]) 

516 

517 @classmethod 

518 def _getdtype(cls, val): 

519 """Returns the dtype of the input variable.""" 

520 return np.array(val).dtype 

521 

522 @classmethod 

523 def _getsubdtype(cls, val): 

524 """Returns the type of the dtype of the input variable.""" 

525 return np.array(val).dtype.type 

526 

527 @classmethod 

528 def _dtypeortype(cls, dtype): 

529 """Returns dtype for datetime64 and type of dtype otherwise.""" 

530 

531 # This is a bit annoying. We want to return the "general" type in most 

532 # cases (ie. "string" rather than "S10"), but we want to return the 

533 # specific type for datetime64 (ie. "datetime64[us]" rather than 

534 # "datetime64"). 

535 if dtype.type == np.datetime64: 

536 return dtype 

537 return dtype.type 

538 

539 @classmethod 

540 def upgrade_mapper(cls, func, default=None): 

541 """ 

542 Upgrade the mapper of a StringConverter by adding a new function and 

543 its corresponding default. 

544 

545 The input function (or sequence of functions) and its associated 

546 default value (if any) is inserted in penultimate position of the 

547 mapper. The corresponding type is estimated from the dtype of the 

548 default value. 

549 

550 Parameters 

551 ---------- 

552 func : var 

553 Function, or sequence of functions 

554 

555 Examples 

556 -------- 

557 >>> import dateutil.parser 

558 >>> import datetime 

559 >>> dateparser = dateutil.parser.parse 

560 >>> defaultdate = datetime.date(2000, 1, 1) 

561 >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) 

562 """ 

563 # Func is a single functions 

564 if hasattr(func, '__call__'): 

565 cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) 

566 return 

567 elif hasattr(func, '__iter__'): 

568 if isinstance(func[0], (tuple, list)): 

569 for _ in func: 

570 cls._mapper.insert(-1, _) 

571 return 

572 if default is None: 

573 default = [None] * len(func) 

574 else: 

575 default = list(default) 

576 default.append([None] * (len(func) - len(default))) 

577 for fct, dft in zip(func, default): 

578 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) 

579 

580 @classmethod 

581 def _find_map_entry(cls, dtype): 

582 # if a converter for the specific dtype is available use that 

583 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

584 if dtype.type == deftype: 

585 return i, (deftype, func, default_def) 

586 

587 # otherwise find an inexact match 

588 for i, (deftype, func, default_def) in enumerate(cls._mapper): 

589 if np.issubdtype(dtype.type, deftype): 

590 return i, (deftype, func, default_def) 

591 

592 raise LookupError 

593 

594 def __init__(self, dtype_or_func=None, default=None, missing_values=None, 

595 locked=False): 

596 # Defines a lock for upgrade 

597 self._locked = bool(locked) 

598 # No input dtype: minimal initialization 

599 if dtype_or_func is None: 

600 self.func = str2bool 

601 self._status = 0 

602 self.default = default or False 

603 dtype = np.dtype('bool') 

604 else: 

605 # Is the input a np.dtype ? 

606 try: 

607 self.func = None 

608 dtype = np.dtype(dtype_or_func) 

609 except TypeError: 

610 # dtype_or_func must be a function, then 

611 if not hasattr(dtype_or_func, '__call__'): 

612 errmsg = ("The input argument `dtype` is neither a" 

613 " function nor a dtype (got '%s' instead)") 

614 raise TypeError(errmsg % type(dtype_or_func)) 

615 # Set the function 

616 self.func = dtype_or_func 

617 # If we don't have a default, try to guess it or set it to 

618 # None 

619 if default is None: 

620 try: 

621 default = self.func('0') 

622 except ValueError: 

623 default = None 

624 dtype = self._getdtype(default) 

625 

626 # find the best match in our mapper 

627 try: 

628 self._status, (_, func, default_def) = self._find_map_entry(dtype) 

629 except LookupError: 

630 # no match 

631 self.default = default 

632 _, func, _ = self._mapper[-1] 

633 self._status = 0 

634 else: 

635 # use the found default only if we did not already have one 

636 if default is None: 

637 self.default = default_def 

638 else: 

639 self.default = default 

640 

641 # If the input was a dtype, set the function to the last we saw 

642 if self.func is None: 

643 self.func = func 

644 

645 # If the status is 1 (int), change the function to 

646 # something more robust. 

647 if self.func == self._mapper[1][1]: 

648 if issubclass(dtype.type, np.uint64): 

649 self.func = np.uint64 

650 elif issubclass(dtype.type, np.int64): 

651 self.func = np.int64 

652 else: 

653 self.func = lambda x: int(float(x)) 

654 # Store the list of strings corresponding to missing values. 

655 if missing_values is None: 

656 self.missing_values = {''} 

657 else: 

658 if isinstance(missing_values, str): 

659 missing_values = missing_values.split(",") 

660 self.missing_values = set(list(missing_values) + ['']) 

661 

662 self._callingfunction = self._strict_call 

663 self.type = self._dtypeortype(dtype) 

664 self._checked = False 

665 self._initial_default = default 

666 

667 def _loose_call(self, value): 

668 try: 

669 return self.func(value) 

670 except ValueError: 

671 return self.default 

672 

673 def _strict_call(self, value): 

674 try: 

675 

676 # We check if we can convert the value using the current function 

677 new_value = self.func(value) 

678 

679 # In addition to having to check whether func can convert the 

680 # value, we also have to make sure that we don't get overflow 

681 # errors for integers. 

682 if self.func is int: 

683 try: 

684 np.array(value, dtype=self.type) 

685 except OverflowError: 

686 raise ValueError 

687 

688 # We're still here so we can now return the new value 

689 return new_value 

690 

691 except ValueError: 

692 if value.strip() in self.missing_values: 

693 if not self._status: 

694 self._checked = False 

695 return self.default 

696 raise ValueError("Cannot convert string '%s'" % value) 

697 

698 def __call__(self, value): 

699 return self._callingfunction(value) 

700 

701 def _do_upgrade(self): 

702 # Raise an exception if we locked the converter... 

703 if self._locked: 

704 errmsg = "Converter is locked and cannot be upgraded" 

705 raise ConverterLockError(errmsg) 

706 _statusmax = len(self._mapper) 

707 # Complains if we try to upgrade by the maximum 

708 _status = self._status 

709 if _status == _statusmax: 

710 errmsg = "Could not find a valid conversion function" 

711 raise ConverterError(errmsg) 

712 elif _status < _statusmax - 1: 

713 _status += 1 

714 self.type, self.func, default = self._mapper[_status] 

715 self._status = _status 

716 if self._initial_default is not None: 

717 self.default = self._initial_default 

718 else: 

719 self.default = default 

720 

721 def upgrade(self, value): 

722 """ 

723 Find the best converter for a given string, and return the result. 

724 

725 The supplied string `value` is converted by testing different 

726 converters in order. First the `func` method of the 

727 `StringConverter` instance is tried, if this fails other available 

728 converters are tried. The order in which these other converters 

729 are tried is determined by the `_status` attribute of the instance. 

730 

731 Parameters 

732 ---------- 

733 value : str 

734 The string to convert. 

735 

736 Returns 

737 ------- 

738 out : any 

739 The result of converting `value` with the appropriate converter. 

740 

741 """ 

742 self._checked = True 

743 try: 

744 return self._strict_call(value) 

745 except ValueError: 

746 self._do_upgrade() 

747 return self.upgrade(value) 

748 

749 def iterupgrade(self, value): 

750 self._checked = True 

751 if not hasattr(value, '__iter__'): 

752 value = (value,) 

753 _strict_call = self._strict_call 

754 try: 

755 for _m in value: 

756 _strict_call(_m) 

757 except ValueError: 

758 self._do_upgrade() 

759 self.iterupgrade(value) 

760 

761 def update(self, func, default=None, testing_value=None, 

762 missing_values='', locked=False): 

763 """ 

764 Set StringConverter attributes directly. 

765 

766 Parameters 

767 ---------- 

768 func : function 

769 Conversion function. 

770 default : any, optional 

771 Value to return by default, that is, when the string to be 

772 converted is flagged as missing. If not given, 

773 `StringConverter` tries to supply a reasonable default value. 

774 testing_value : str, optional 

775 A string representing a standard input value of the converter. 

776 This string is used to help defining a reasonable default 

777 value. 

778 missing_values : {sequence of str, None}, optional 

779 Sequence of strings indicating a missing value. If ``None``, then 

780 the existing `missing_values` are cleared. The default is `''`. 

781 locked : bool, optional 

782 Whether the StringConverter should be locked to prevent 

783 automatic upgrade or not. Default is False. 

784 

785 Notes 

786 ----- 

787 `update` takes the same parameters as the constructor of 

788 `StringConverter`, except that `func` does not accept a `dtype` 

789 whereas `dtype_or_func` in the constructor does. 

790 

791 """ 

792 self.func = func 

793 self._locked = locked 

794 

795 # Don't reset the default to None if we can avoid it 

796 if default is not None: 

797 self.default = default 

798 self.type = self._dtypeortype(self._getdtype(default)) 

799 else: 

800 try: 

801 tester = func(testing_value or '1') 

802 except (TypeError, ValueError): 

803 tester = None 

804 self.type = self._dtypeortype(self._getdtype(tester)) 

805 

806 # Add the missing values to the existing set or clear it. 

807 if missing_values is None: 

808 # Clear all missing values even though the ctor initializes it to 

809 # set(['']) when the argument is None. 

810 self.missing_values = set() 

811 else: 

812 if not np.iterable(missing_values): 

813 missing_values = [missing_values] 

814 if not all(isinstance(v, str) for v in missing_values): 

815 raise TypeError("missing_values must be strings or unicode") 

816 self.missing_values.update(missing_values) 

817 

818 

819def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): 

820 """ 

821 Convenience function to create a `np.dtype` object. 

822 

823 The function processes the input `dtype` and matches it with the given 

824 names. 

825 

826 Parameters 

827 ---------- 

828 ndtype : var 

829 Definition of the dtype. Can be any string or dictionary recognized 

830 by the `np.dtype` function, or a sequence of types. 

831 names : str or sequence, optional 

832 Sequence of strings to use as field names for a structured dtype. 

833 For convenience, `names` can be a string of a comma-separated list 

834 of names. 

835 defaultfmt : str, optional 

836 Format string used to define missing names, such as ``"f%i"`` 

837 (default) or ``"fields_%02i"``. 

838 validationargs : optional 

839 A series of optional arguments used to initialize a 

840 `NameValidator`. 

841 

842 Examples 

843 -------- 

844 >>> np.lib._iotools.easy_dtype(float) 

845 dtype('float64') 

846 >>> np.lib._iotools.easy_dtype("i4, f8") 

847 dtype([('f0', '<i4'), ('f1', '<f8')]) 

848 >>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i") 

849 dtype([('field_000', '<i4'), ('field_001', '<f8')]) 

850 

851 >>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c") 

852 dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')]) 

853 >>> np.lib._iotools.easy_dtype(float, names="a,b,c") 

854 dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')]) 

855 

856 """ 

857 try: 

858 ndtype = np.dtype(ndtype) 

859 except TypeError: 

860 validate = NameValidator(**validationargs) 

861 nbfields = len(ndtype) 

862 if names is None: 

863 names = [''] * len(ndtype) 

864 elif isinstance(names, str): 

865 names = names.split(",") 

866 names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) 

867 ndtype = np.dtype(dict(formats=ndtype, names=names)) 

868 else: 

869 # Explicit names 

870 if names is not None: 

871 validate = NameValidator(**validationargs) 

872 if isinstance(names, str): 

873 names = names.split(",") 

874 # Simple dtype: repeat to match the nb of names 

875 if ndtype.names is None: 

876 formats = tuple([ndtype.type] * len(names)) 

877 names = validate(names, defaultfmt=defaultfmt) 

878 ndtype = np.dtype(list(zip(names, formats))) 

879 # Structured dtype: just validate the names as needed 

880 else: 

881 ndtype.names = validate(names, nbfields=len(ndtype.names), 

882 defaultfmt=defaultfmt) 

883 # No implicit names 

884 elif ndtype.names is not None: 

885 validate = NameValidator(**validationargs) 

886 # Default initial names : should we change the format ? 

887 numbered_names = tuple("f%i" % i for i in range(len(ndtype.names))) 

888 if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")): 

889 ndtype.names = validate([''] * len(ndtype.names), 

890 defaultfmt=defaultfmt) 

891 # Explicit initial names : just validate 

892 else: 

893 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) 

894 return ndtype