Edit on GitHub

parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

  • treat inf as either a float or a normal string
  • give exact Decimal values instead of floats
  • detect inline lists
 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.2.3"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types')
class TypeParser:
 60class TypeParser:
 61	"""
 62		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
 63
 64		Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.
 65	"""
 66
 67	def __init__(self,
 68	    *,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			Parameters
 87			----------
 88			`trim`
 89			: whether leading and trailing whitespace should be stripped from strings
 90
 91			`use_decimal`
 92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 93
 94			`list_delimiter`
 95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 96
 97			`none_values`
 98			: list of strings that represent the value None
 99
100			`none_case_sensitive`
101			: whether matches against `none_values` should be made in a case-sensitive manner
102
103			`true_values`
104			: list of strings that represent the bool value True
105
106			`false_values`
107			: list of strings that represent the bool value False
108
109			`bool_case_sensitive`
110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
111
112			`int_case_sensitive`
113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
114
115			`inf_values`
116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
117
118			`nan_values`
119			: list of strings that represent a float or Decimal that is NaN (not a number)
120
121			`float_case_sensitive`
122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
123
124			`case_sensitive`
125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
126
127			Raises
128			------
129			`ValueError` if any of the options would lead to ambiguities during parsing
130		"""
131
132		if case_sensitive is not None:
133			none_case_sensitive = case_sensitive
134			int_case_sensitive = case_sensitive
135			bool_case_sensitive = case_sensitive
136			float_case_sensitive = case_sensitive
137
138		self.trim = trim
139		if self.trim:
140			none_values = (value.strip() for value in none_values)
141			true_values = (value.strip() for value in true_values)
142			false_values = (value.strip() for value in false_values)
143			inf_values = (value.strip() for value in inf_values)
144			nan_values = (value.strip() for value in nan_values)
145
146		self.use_decimal = use_decimal
147		self.list_delimiter = list_delimiter
148
149		self.none_case_sensitive = none_case_sensitive
150		if not self.none_case_sensitive:
151			none_values = (value.lower() for value in none_values)
152		self.none_values = set(none_values)
153
154		self.bool_case_sensitive = bool_case_sensitive
155		if not self.bool_case_sensitive:
156			true_values = (value.lower() for value in true_values)
157			false_values = (value.lower() for value in false_values)
158		self.true_values = set(true_values)
159		self.false_values = set(false_values)
160
161		self.int_case_sensitive = int_case_sensitive
162
163		self.float_case_sensitive = float_case_sensitive
164		if not self.float_case_sensitive:
165			inf_values = (value.lower() for value in inf_values)
166			nan_values = (value.lower() for value in nan_values)
167		self.inf_values = set(inf_values)
168		self.nan_values = set(nan_values)
169
170		# Unconfigurable default values
171		self._negative_char = "-"
172		self._negative_chars = {self._negative_char, "−"}
173		self._sign_chars = self._negative_chars | {"+"}
174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
175		self._digit_separators = {"_"}
176		self._scientific_char = "e"
177		self._float_separator = "."
178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
179		# special_chars = self._reserved_chars | self.list_delimiter
180
181		# Check if any special values conflict
182		for name, special_values in [
183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
184			(_SpecialValue.NONE, self.none_values),
185			(_SpecialValue.TRUE, self.true_values),
186			(_SpecialValue.FALSE, self.false_values),
187			(_SpecialValue.INF, self.inf_values),
188			(_SpecialValue.NAN, self.nan_values),
189		]:
190			for special_value in special_values:
191				if special_value in self._reserved_chars:
192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
193
194				if name != _SpecialValue.NONE and self.is_none(special_value):
195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
196
197				if (
198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
201				):
202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
203
204				if self.is_int(special_value):
205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
206
207				if self.use_decimal:
208					if (
209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
212					):
213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
214				else:
215					if (
216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
219					):
220						raise ValueError(f"cannot use float value as {name}: {special_value}")
221
222
223	def is_none(self, value: str) -> bool:
224		"""
225			Check if a string represents the value None
226
227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
228
229			Parameters
230			----------
231			`value`
232			: string to be checked
233
234			Returns
235			-------
236			whether it is None
237
238			Examples
239			--------
240			```python
241			parser = TypeParser()
242			parser.parse_bool("")     # True
243			parser.parse_bool("abc")  # False
244			```
245		"""
246		if self.trim:
247			value = value.strip()
248		if not self.bool_case_sensitive:
249			value = value.lower()
250
251		if value in self.none_values:
252			return True
253		else:
254			return False
255
256
257	def is_bool(self, value: str) -> bool:
258		"""
259			Check if a string represents a bool
260
261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
262
263			Parameters
264			----------
265			`value`
266			: string to be checked
267
268			Returns
269			-------
270			whether it is a bool
271
272			Examples
273			--------
274			```python
275			parser = TypeParser()
276			parser.is_bool("true")  # True
277			parser.is_bool("")      # True
278			parser.is_bool("abc")   # False
279			```
280		"""
281		if self.trim:
282			value = value.strip()
283
284		if not self.bool_case_sensitive:
285			value = value.lower()
286		if value in self.true_values:
287			return True
288		if value in self.false_values:
289			return True
290
291		return False
292
293
294	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
295		"""
296			Check if a string represents an int
297
298			Parameters
299			----------
300			`value`
301			: string to be checked
302
303			`allow_negative`
304			: whether to accept negative values
305
306			`allow_sign`
307			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
308
309			`allow_scientific`
310			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
311
312			Returns
313			-------
314			whether it is an int
315
316			Examples
317			--------
318			```python
319			parser = TypeParser()
320			parser.is_int("0")    # True
321			parser.is_int("-1")   # True
322			parser.is_int("abc")  # False
323			parser.is_int("")     # False
324			```
325		"""
326		if self.trim:
327			value = value.strip()
328
329		if len(value) == 0:
330			return False
331
332		if allow_scientific:
333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
334			if exp is not None:
335				return self.is_int(
336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
337				) and self.is_int(
338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
339				)
340
341		if value[0] in self._sign_chars:
342			if len(value) == 1:
343				return False
344			if not allow_sign:
345				return False
346			if not allow_negative and value[0] in self._negative_chars:
347				return False
348			value = value[1:]
349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
350			return False
351
352		prev_separated = False
353		for c in value:
354			if c in self._digit_separators:
355				if prev_separated:
356					return False
357				prev_separated = True
358			else:
359				prev_separated = False
360				if c not in self._digit_chars:
361					return False
362		return True
363
364
365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
366		"""
367			Check if a string represents a float (or equivalently, a Decimal)
368
369			This function will also return True if the string represents an int.
370
371			Alias: `is_decimal()`
372
373			Parameters
374			----------
375			`value`
376			: string to be checked
377
378			`allow_scientific`
379			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
380
381			`allow_inf`
382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
383
384			`allow_nan`
385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
386
387			Returns
388			-------
389			whether it is a float or Decimal
390
391			Examples
392			--------
393			```python
394			parser = TypeParser()
395			parser.is_float("1.")       # True
396			parser.is_float("12.3e-2")  # True
397			parser.is_float("abc")      # False
398			parser.is_float("")         # False
399			```
400		"""
401		if self.trim:
402			value = value.strip()
403
404		if len(value) > 0 and value[0] in self._sign_chars:
405			value = value[1:]
406
407		if self.float_case_sensitive:
408			special_value = value
409		else:
410			special_value = value.lower()
411		if allow_inf and special_value in self.inf_values:
412			return True
413		if allow_nan and special_value in self.nan_values:
414			return True
415
416		if len(value) == 0:
417			return False
418
419		if allow_scientific:
420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
421			if exp is not None:
422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
423
424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
425		if frac is not None:
426			if value == "" and frac == "":
427				return False
428			return (
429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
430			) and (
431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
432			)
433
434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
435
436
437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
438		"""
439			Alias of `is_float()`
440		"""
441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
442
443
444	def parse_none(self, value: str) -> None:
445		"""
446			Parse a string and return it as the value None if possible
447
448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
449
450			Parameters
451			----------
452			`value`
453			: string to be parsed
454
455			Returns
456			-------
457			parsed None value
458
459			Raises
460			------
461			`ValueError` if `value` cannot be parsed
462
463			Examples
464			--------
465			```python
466			parser = TypeParser()
467			parser.parse_bool("")     # None
468			parser.parse_bool("abc")  # raises ValueError
469			```
470		"""
471		if self.is_none(value):
472			return None
473		else:
474			raise ValueError(f"not a none value: {value}")
475
476
477	def parse_bool(self, value: str) -> bool:
478		"""
479			Parse a string and return it as a bool if possible
480
481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
482
483			Parameters
484			----------
485			`value`
486			: string to be parsed
487
488			Returns
489			-------
490			parsed bool value
491
492			Raises
493			------
494			`ValueError` if `value` cannot be parsed
495
496			Examples
497			--------
498			```python
499			parser = TypeParser()
500			parser.parse_bool("true")   # True
501			parser.parse_bool("FALSE")  # False
502			```
503		"""
504		if self.trim:
505			value = value.strip()
506
507		if self.bool_case_sensitive:
508			special_value = value
509		else:
510			special_value = value.lower()
511		if special_value in self.true_values:
512			return True
513		if special_value in self.false_values:
514			return False
515
516		raise ValueError(f"not a boolean: {value}")
517
518
519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
520		"""
521			Parse a string and return it as an int if possible
522
523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
524
525			Parameters
526			----------
527			`value`
528			: string to be parsed
529
530			`allow_scientific`
531			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
532
533			Returns
534			-------
535			parsed int value
536
537			Raises
538			------
539			`ValueError` if `value` cannot be parsed
540
541			Examples
542			--------
543			```python
544			parser = TypeParser()
545			parser.parse_int("0")    # 0
546			parser.parse_int("-1")   # -1
547			parser.parse_int("2e3")  # 2000
548			```
549		"""
550		if self.trim:
551			value = value.strip()
552
553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
554			if allow_scientific:
555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
556				if exp is not None:
557					if value[0] in (self._negative_chars - {self._negative_char}):
558						value = self._negative_char + value[1:]
559					return int(value) * (10 ** int(exp))
560
561			if value[0] in (self._negative_chars - {self._negative_char}):
562				value = self._negative_char + value[1:]
563			return int(value)
564
565		elif self.is_bool(value):
566			return int(self.parse_bool(value))
567		else:
568			raise ValueError(f"not an integer: {value}")
569
570
571	def _parse_floatlike(self,
572		value: str,
573		converter: Callable[[Union[str, bool]], _FloatLike],
574		inf_value: _FloatLike,
575		nan_value: _FloatLike,
576		*,
577		allow_scientific: bool=True,
578		allow_inf: bool=True,
579		allow_nan: bool=True
580	) -> _FloatLike:
581		if self.trim:
582			value = value.strip()
583		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
584			if self.float_case_sensitive:
585				special_value = value
586			else:
587				special_value = value.lower()
588			if allow_inf and special_value in self.inf_values:
589				return inf_value
590			if allow_nan and special_value in self.nan_values:
591				return nan_value
592
593			if len(value) > 0 and value[0] in self._sign_chars:
594				positive_part = value[1:]
595				if self.float_case_sensitive:
596					special_value = positive_part
597				else:
598					special_value = positive_part.lower()
599				if allow_inf and special_value in self.inf_values:
600					if value[0] in self._negative_chars:
601						return -1 * inf_value
602					else:
603						return inf_value
604				if allow_nan and special_value in self.nan_values:
605					return nan_value
606
607				if value[0] in self._negative_chars:
608					value = self._negative_char + positive_part
609			return converter(value)
610		elif self.is_bool(value):
611			return converter(self.parse_bool(value))
612		else:
613			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
614
615
616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
617		"""
618			Parse a string and return it as a (non-exact) float if possible
619
620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
621
622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
623
624			Parameters
625			----------
626			`value`
627			: string to be parsed
628
629			`allow_scientific`
630			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
631
632			`allow_inf`
633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
634
635			`allow_nan`
636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
637
638			Returns
639			-------
640			parsed float value
641
642			Raises
643			------
644			`ValueError` if `value` cannot be parsed
645
646			Examples
647			--------
648			```python
649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
650			parser.parse_float("1.")       # 1.
651			parser.parse_float("1.23e2")   # 123.
652			parser.parse_float("1.23e-2")  # 0.0123
653			parser.parse_float("inf")      # math.inf
654			```
655		"""
656		return self._parse_floatlike(value, float, math.inf, math.nan,
657			allow_scientific=allow_scientific,
658			allow_inf=allow_inf,
659			allow_nan=allow_nan,
660		)
661
662
663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
664		"""
665			Parse a string and return it as an exact Decimal if possible
666
667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
668
669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
670
671			Parameters
672			----------
673			`value`
674			: string to be parsed
675
676			`allow_scientific`
677			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
678
679			`allow_inf`
680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
681
682			`allow_nan`
683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
684
685			Returns
686			-------
687			parsed Decimal value
688
689			Raises
690			------
691			`ValueError` if `value` cannot be parsed
692
693			Examples
694			--------
695			```python
696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
697			parser.parse_decimal("1.")       # Decimal(1)
698			parser.parse_decimal("1.23e2")   # Decimal(123)
699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
700			parser.parse_decimal("inf")      # Decimal(math.inf)
701			```
702		"""
703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
704			allow_scientific=allow_scientific,
705			allow_inf=allow_inf,
706			allow_nan=allow_nan,
707		)
708
709
710	def infer(self, value: str) -> AnyValueType:
711		"""
712			Infer the underlying type of a string
713
714			Also check for inline lists if `self.list_delimiter` is not None.
715
716			Parameters
717			----------
718			`value`
719			: the string for which the type should be inferred
720
721			Returns
722			-------
723			inferred type
724
725			Examples
726			--------
727			```python
728			parser = TypeParser()
729			parser.infer("true")  # bool
730			parser.infer("2.0")   # float
731			parser.infer("abc")   # str
732			```
733		"""
734		if self.is_none(value):
735			return NoneType
736		if self.is_bool(value):
737			return bool
738		if self.is_int(value):
739			return int
740		if self.is_float(value):
741			if self.use_decimal:
742				return Decimal
743			else:
744				return float
745
746		if self.trim:
747			value = value.strip()
748
749		if self.list_delimiter is not None and self.list_delimiter in value:
750			subvalues = value.split(self.list_delimiter)
751			if self.trim:
752				subvalues = [subvalue.strip() for subvalue in subvalues]
753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
754			r = list[reduced_type]
755			return r
756
757		return GenericValue
758
759
760	def infer_series(self, values: Iterable[str]) -> AnyValueType:
761		"""
762			Infer the underlying common type of a series of strings
763
764			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
765
766			Parameters
767			----------
768			`values`
769			: series of strings for which the type should be inferred
770
771			Returns
772			-------
773			inferred type
774
775			Examples
776			--------
777			```python
778			parser = TypeParser()
779			parser.infer_series(["1", "2", "3.4"])       # float
780			parser.infer_series(["true", "false", "2"])  # int
781			parser.infer_series(["1", "2.3", "abc"])     # str
782			```
783		"""
784		return reduce_types(self.infer(value) for value in values)
785
786
787	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
788		"""
789			Infer the underlying common type for each column of a table of strings
790
791			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
792
793			Note that the individual inferred types of every value in the table must be able to fit into memory.
794
795			Parameters
796			----------
797			`rows`
798			: table of strings for which the types should be inferred, in row-major order
799
800			Returns
801			-------
802			inferred types
803
804			Examples
805			--------
806			```python
807			parser = TypeParser()
808			parser.infer_table([
809				["1",   "true",  "1"],
810				["2",   "false", "2.3"],
811				["3.4", "2",     "abc"],
812			])
813			# [float, int, str]
814			```
815		"""
816		rows_iter = iter(rows)
817		first_row = next(rows_iter, None)
818		if first_row is None:
819			return []
820
821		num_cols = len(first_row)
822		if num_cols == 0:
823			return []
824
825		table = _TypeTable([[self.infer(value)] for value in first_row])
826		for row in rows_iter:
827			table.add_row([self.infer(value) for value in row])
828
829		return [reduce_types(col) for col in table.cols]
830
831
832	def _convert(self, value: str, t: AnyValueType) -> AnyValue:
833		base, type_args = _decompose_type(t)
834		if base == NoneType:
835			return None
836		elif base == bool:
837			return self.parse_bool(value)
838		elif base == int:
839			return self.parse_int(value)
840		elif base == Decimal:
841			return self.parse_decimal(value)
842		elif base == float:
843			return self.parse_float(value)
844		elif base == str:
845			return value
846		elif base == Nullable:
847			if self.is_none(value):
848				return None
849			else:
850				if type_args is not  None and len(type_args) == 1 and type_args[0] != str:
851					inner_type = type_args[0]
852					return self._convert(value, inner_type)
853				else:
854					return value
855		elif base == list:
856			subvalues = value.split(self.list_delimiter)
857			if self.trim:
858				subvalues = [subvalue.strip() for subvalue in subvalues]
859			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
860				subtype = type_args[0]
861				return [self._convert(subvalue, subtype) for subvalue in subvalues]
862			else:
863				return subvalues
864		else:
865			return value
866
867
868	def parse(self, value: str) -> AnyValue:
869		"""
870			Parse a string and convert it to its underlying type
871
872			Parameters
873			----------
874			`value`
875			: the string to be parsed
876
877			Returns
878			-------
879			converted value
880
881			Examples
882			--------
883			```python
884			parser = TypeParser()
885			parser.parse("true")  # True
886			parser.parse("2.0")   # 2.
887			parser.parse("abc")   # "abc"
888			```
889		"""
890		return self._convert(value, self.infer(value))
891
892
893	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
894		"""
895			Parse a series of strings and convert them to their underlying common type
896
897			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
898
899			Parameters
900			----------
901			`values`
902			: series of strings to be parsed
903
904			Returns
905			-------
906			converted values
907
908			Examples
909			--------
910			```python
911			parser = TypeParser()
912			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
913			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
914			parser.parse_series(["true", "false", ""])  # [True, False, None]
915			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
916			```
917		"""
918		inferred = self.infer_series(values)
919		return [self._convert(value, inferred) for value in values]
920
921
922	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
923		"""
924			Parse a table of strings and convert them to the underlying common type of each column
925
926			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
927
928			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
929
930			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
931
932			Parameters
933			----------
934			`rows`
935			: table of strings to be parsed, in row-major order
936
937			`iterator`
938			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
939
940			Returns
941			-------
942			converted table of values, in row-major order
943
944			Examples
945			--------
946			```python
947			parser = TypeParser()
948			table = parser.parse_table([
949				["1", "5",   "true",  "1"],
950				["2", "6.7", "false", "2.3"],
951				["3", "8.0", "",      "abc"],
952			]):
953			assert table == [
954				[1, 5.,  True,  "1"],
955				[2, 6.7, False, "2.3"],
956				[3, 8.,  None,  "abc"],
957			]
958			```
959		"""
960		return [converted_row for converted_row in self.iterate_table(rows)]
961
962
963	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
964		"""
965			Parse a table of strings for the underlying common type of each column, then convert and yield each row
966
967			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
968
969			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
970
971			Parameters
972			----------
973			`rows`
974			: table of strings to be parsed, in row-major order
975
976			Yields
977			-------
978			each row of converted table values
979
980			Examples
981			--------
982			```python
983			parser = TypeParser()
984			table = parser.iterate_table([
985				["1",   "true",  "1"],
986				["2",   "false", "2.3"],
987				["3.4", "2",     "abc"],
988			]):
989			assert next(table) == [1.,  1, "1"]
990			assert next(table) == [2.,  0, "2.3"]
991			assert next(table) == [3.4, 2, "abc"]
992			```
993		"""
994		inferred_types = self.infer_table(rows)
995
996		for row in rows:
997			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: Optional[str] = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: Optional[bool] = None)
 67	def __init__(self,
 68	    *,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			Parameters
 87			----------
 88			`trim`
 89			: whether leading and trailing whitespace should be stripped from strings
 90
 91			`use_decimal`
 92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 93
 94			`list_delimiter`
 95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 96
 97			`none_values`
 98			: list of strings that represent the value None
 99
100			`none_case_sensitive`
101			: whether matches against `none_values` should be made in a case-sensitive manner
102
103			`true_values`
104			: list of strings that represent the bool value True
105
106			`false_values`
107			: list of strings that represent the bool value False
108
109			`bool_case_sensitive`
110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
111
112			`int_case_sensitive`
113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
114
115			`inf_values`
116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
117
118			`nan_values`
119			: list of strings that represent a float or Decimal that is NaN (not a number)
120
121			`float_case_sensitive`
122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
123
124			`case_sensitive`
125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
126
127			Raises
128			------
129			`ValueError` if any of the options would lead to ambiguities during parsing
130		"""
131
132		if case_sensitive is not None:
133			none_case_sensitive = case_sensitive
134			int_case_sensitive = case_sensitive
135			bool_case_sensitive = case_sensitive
136			float_case_sensitive = case_sensitive
137
138		self.trim = trim
139		if self.trim:
140			none_values = (value.strip() for value in none_values)
141			true_values = (value.strip() for value in true_values)
142			false_values = (value.strip() for value in false_values)
143			inf_values = (value.strip() for value in inf_values)
144			nan_values = (value.strip() for value in nan_values)
145
146		self.use_decimal = use_decimal
147		self.list_delimiter = list_delimiter
148
149		self.none_case_sensitive = none_case_sensitive
150		if not self.none_case_sensitive:
151			none_values = (value.lower() for value in none_values)
152		self.none_values = set(none_values)
153
154		self.bool_case_sensitive = bool_case_sensitive
155		if not self.bool_case_sensitive:
156			true_values = (value.lower() for value in true_values)
157			false_values = (value.lower() for value in false_values)
158		self.true_values = set(true_values)
159		self.false_values = set(false_values)
160
161		self.int_case_sensitive = int_case_sensitive
162
163		self.float_case_sensitive = float_case_sensitive
164		if not self.float_case_sensitive:
165			inf_values = (value.lower() for value in inf_values)
166			nan_values = (value.lower() for value in nan_values)
167		self.inf_values = set(inf_values)
168		self.nan_values = set(nan_values)
169
170		# Unconfigurable default values
171		self._negative_char = "-"
172		self._negative_chars = {self._negative_char, "−"}
173		self._sign_chars = self._negative_chars | {"+"}
174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
175		self._digit_separators = {"_"}
176		self._scientific_char = "e"
177		self._float_separator = "."
178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
179		# special_chars = self._reserved_chars | self.list_delimiter
180
181		# Check if any special values conflict
182		for name, special_values in [
183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
184			(_SpecialValue.NONE, self.none_values),
185			(_SpecialValue.TRUE, self.true_values),
186			(_SpecialValue.FALSE, self.false_values),
187			(_SpecialValue.INF, self.inf_values),
188			(_SpecialValue.NAN, self.nan_values),
189		]:
190			for special_value in special_values:
191				if special_value in self._reserved_chars:
192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
193
194				if name != _SpecialValue.NONE and self.is_none(special_value):
195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
196
197				if (
198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
201				):
202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
203
204				if self.is_int(special_value):
205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
206
207				if self.use_decimal:
208					if (
209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
212					):
213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
214				else:
215					if (
216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
219					):
220						raise ValueError(f"cannot use float value as {name}: {special_value}")

Initialise a new parser

Parameters

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (infer() and infer_*()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, ignoring any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool:
223	def is_none(self, value: str) -> bool:
224		"""
225			Check if a string represents the value None
226
227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
228
229			Parameters
230			----------
231			`value`
232			: string to be checked
233
234			Returns
235			-------
236			whether it is None
237
238			Examples
239			--------
240			```python
241			parser = TypeParser()
242			parser.parse_bool("")     # True
243			parser.parse_bool("abc")  # False
244			```
245		"""
246		if self.trim:
247			value = value.strip()
248		if not self.bool_case_sensitive:
249			value = value.lower()
250
251		if value in self.none_values:
252			return True
253		else:
254			return False

Check if a string represents the value None

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.parse_bool("")     # True
parser.parse_bool("abc")  # False
def is_bool(self, value: str) -> bool:
257	def is_bool(self, value: str) -> bool:
258		"""
259			Check if a string represents a bool
260
261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
262
263			Parameters
264			----------
265			`value`
266			: string to be checked
267
268			Returns
269			-------
270			whether it is a bool
271
272			Examples
273			--------
274			```python
275			parser = TypeParser()
276			parser.is_bool("true")  # True
277			parser.is_bool("")      # True
278			parser.is_bool("abc")   # False
279			```
280		"""
281		if self.trim:
282			value = value.strip()
283
284		if not self.bool_case_sensitive:
285			value = value.lower()
286		if value in self.true_values:
287			return True
288		if value in self.false_values:
289			return True
290
291		return False

Check if a string represents a bool

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False
def is_int( self, value: str, *, allow_sign: bool = True, allow_negative: bool = True, allow_scientific: bool = True) -> bool:
294	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
295		"""
296			Check if a string represents an int
297
298			Parameters
299			----------
300			`value`
301			: string to be checked
302
303			`allow_negative`
304			: whether to accept negative values
305
306			`allow_sign`
307			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
308
309			`allow_scientific`
310			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
311
312			Returns
313			-------
314			whether it is an int
315
316			Examples
317			--------
318			```python
319			parser = TypeParser()
320			parser.is_int("0")    # True
321			parser.is_int("-1")   # True
322			parser.is_int("abc")  # False
323			parser.is_int("")     # False
324			```
325		"""
326		if self.trim:
327			value = value.strip()
328
329		if len(value) == 0:
330			return False
331
332		if allow_scientific:
333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
334			if exp is not None:
335				return self.is_int(
336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
337				) and self.is_int(
338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
339				)
340
341		if value[0] in self._sign_chars:
342			if len(value) == 1:
343				return False
344			if not allow_sign:
345				return False
346			if not allow_negative and value[0] in self._negative_chars:
347				return False
348			value = value[1:]
349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
350			return False
351
352		prev_separated = False
353		for c in value:
354			if c in self._digit_separators:
355				if prev_separated:
356					return False
357				prev_separated = True
358			else:
359				prev_separated = False
360				if c not in self._digit_chars:
361					return False
362		return True

Check if a string represents an int

Parameters

value : string to be checked

allow_negative : whether to accept negative values

allow_sign : whether to accept signed values. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 * X), where M is the mantissa/significand and X is the exponent. Note M *must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False
def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
366		"""
367			Check if a string represents a float (or equivalently, a Decimal)
368
369			This function will also return True if the string represents an int.
370
371			Alias: `is_decimal()`
372
373			Parameters
374			----------
375			`value`
376			: string to be checked
377
378			`allow_scientific`
379			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
380
381			`allow_inf`
382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
383
384			`allow_nan`
385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
386
387			Returns
388			-------
389			whether it is a float or Decimal
390
391			Examples
392			--------
393			```python
394			parser = TypeParser()
395			parser.is_float("1.")       # True
396			parser.is_float("12.3e-2")  # True
397			parser.is_float("abc")      # False
398			parser.is_float("")         # False
399			```
400		"""
401		if self.trim:
402			value = value.strip()
403
404		if len(value) > 0 and value[0] in self._sign_chars:
405			value = value[1:]
406
407		if self.float_case_sensitive:
408			special_value = value
409		else:
410			special_value = value.lower()
411		if allow_inf and special_value in self.inf_values:
412			return True
413		if allow_nan and special_value in self.nan_values:
414			return True
415
416		if len(value) == 0:
417			return False
418
419		if allow_scientific:
420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
421			if exp is not None:
422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
423
424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
425		if frac is not None:
426			if value == "" and frac == "":
427				return False
428			return (
429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
430			) and (
431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
432			)
433
434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Parameters

value : string to be checked

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False
def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
438		"""
439			Alias of `is_float()`
440		"""
441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None:
444	def parse_none(self, value: str) -> None:
445		"""
446			Parse a string and return it as the value None if possible
447
448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
449
450			Parameters
451			----------
452			`value`
453			: string to be parsed
454
455			Returns
456			-------
457			parsed None value
458
459			Raises
460			------
461			`ValueError` if `value` cannot be parsed
462
463			Examples
464			--------
465			```python
466			parser = TypeParser()
467			parser.parse_bool("")     # None
468			parser.parse_bool("abc")  # raises ValueError
469			```
470		"""
471		if self.is_none(value):
472			return None
473		else:
474			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("")     # None
parser.parse_bool("abc")  # raises ValueError
def parse_bool(self, value: str) -> bool:
477	def parse_bool(self, value: str) -> bool:
478		"""
479			Parse a string and return it as a bool if possible
480
481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
482
483			Parameters
484			----------
485			`value`
486			: string to be parsed
487
488			Returns
489			-------
490			parsed bool value
491
492			Raises
493			------
494			`ValueError` if `value` cannot be parsed
495
496			Examples
497			--------
498			```python
499			parser = TypeParser()
500			parser.parse_bool("true")   # True
501			parser.parse_bool("FALSE")  # False
502			```
503		"""
504		if self.trim:
505			value = value.strip()
506
507		if self.bool_case_sensitive:
508			special_value = value
509		else:
510			special_value = value.lower()
511		if special_value in self.true_values:
512			return True
513		if special_value in self.false_values:
514			return False
515
516		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False
def parse_int(self, value: str, *, allow_scientific: bool = True) -> int:
519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
520		"""
521			Parse a string and return it as an int if possible
522
523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
524
525			Parameters
526			----------
527			`value`
528			: string to be parsed
529
530			`allow_scientific`
531			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
532
533			Returns
534			-------
535			parsed int value
536
537			Raises
538			------
539			`ValueError` if `value` cannot be parsed
540
541			Examples
542			--------
543			```python
544			parser = TypeParser()
545			parser.parse_int("0")    # 0
546			parser.parse_int("-1")   # -1
547			parser.parse_int("2e3")  # 2000
548			```
549		"""
550		if self.trim:
551			value = value.strip()
552
553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
554			if allow_scientific:
555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
556				if exp is not None:
557					if value[0] in (self._negative_chars - {self._negative_char}):
558						value = self._negative_char + value[1:]
559					return int(value) * (10 ** int(exp))
560
561			if value[0] in (self._negative_chars - {self._negative_char}):
562				value = self._negative_char + value[1:]
563			return int(value)
564
565		elif self.is_bool(value):
566			return int(self.parse_bool(value))
567		else:
568			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 * X), where M is the mantissa/significand and X is the exponent. Note M *must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000
def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float:
616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
617		"""
618			Parse a string and return it as a (non-exact) float if possible
619
620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
621
622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
623
624			Parameters
625			----------
626			`value`
627			: string to be parsed
628
629			`allow_scientific`
630			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
631
632			`allow_inf`
633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
634
635			`allow_nan`
636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
637
638			Returns
639			-------
640			parsed float value
641
642			Raises
643			------
644			`ValueError` if `value` cannot be parsed
645
646			Examples
647			--------
648			```python
649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
650			parser.parse_float("1.")       # 1.
651			parser.parse_float("1.23e2")   # 123.
652			parser.parse_float("1.23e-2")  # 0.0123
653			parser.parse_float("inf")      # math.inf
654			```
655		"""
656		return self._parse_floatlike(value, float, math.inf, math.nan,
657			allow_scientific=allow_scientific,
658			allow_inf=allow_inf,
659			allow_nan=allow_nan,
660		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf
def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal:
663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
664		"""
665			Parse a string and return it as an exact Decimal if possible
666
667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
668
669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
670
671			Parameters
672			----------
673			`value`
674			: string to be parsed
675
676			`allow_scientific`
677			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
678
679			`allow_inf`
680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
681
682			`allow_nan`
683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
684
685			Returns
686			-------
687			parsed Decimal value
688
689			Raises
690			------
691			`ValueError` if `value` cannot be parsed
692
693			Examples
694			--------
695			```python
696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
697			parser.parse_decimal("1.")       # Decimal(1)
698			parser.parse_decimal("1.23e2")   # Decimal(123)
699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
700			parser.parse_decimal("inf")      # Decimal(math.inf)
701			```
702		"""
703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
704			allow_scientific=allow_scientific,
705			allow_inf=allow_inf,
706			allow_nan=allow_nan,
707		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)
def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
710	def infer(self, value: str) -> AnyValueType:
711		"""
712			Infer the underlying type of a string
713
714			Also check for inline lists if `self.list_delimiter` is not None.
715
716			Parameters
717			----------
718			`value`
719			: the string for which the type should be inferred
720
721			Returns
722			-------
723			inferred type
724
725			Examples
726			--------
727			```python
728			parser = TypeParser()
729			parser.infer("true")  # bool
730			parser.infer("2.0")   # float
731			parser.infer("abc")   # str
732			```
733		"""
734		if self.is_none(value):
735			return NoneType
736		if self.is_bool(value):
737			return bool
738		if self.is_int(value):
739			return int
740		if self.is_float(value):
741			if self.use_decimal:
742				return Decimal
743			else:
744				return float
745
746		if self.trim:
747			value = value.strip()
748
749		if self.list_delimiter is not None and self.list_delimiter in value:
750			subvalues = value.split(self.list_delimiter)
751			if self.trim:
752				subvalues = [subvalue.strip() for subvalue in subvalues]
753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
754			r = list[reduced_type]
755			return r
756
757		return GenericValue

Infer the underlying type of a string

Also check for inline lists if self.list_delimiter is not None.

Parameters

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str
def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
760	def infer_series(self, values: Iterable[str]) -> AnyValueType:
761		"""
762			Infer the underlying common type of a series of strings
763
764			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
765
766			Parameters
767			----------
768			`values`
769			: series of strings for which the type should be inferred
770
771			Returns
772			-------
773			inferred type
774
775			Examples
776			--------
777			```python
778			parser = TypeParser()
779			parser.infer_series(["1", "2", "3.4"])       # float
780			parser.infer_series(["true", "false", "2"])  # int
781			parser.infer_series(["1", "2.3", "abc"])     # str
782			```
783		"""
784		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str
def infer_table( self, rows: Iterable[Sequence[str]]) -> list[typing.Type[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]:
787	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
788		"""
789			Infer the underlying common type for each column of a table of strings
790
791			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
792
793			Note that the individual inferred types of every value in the table must be able to fit into memory.
794
795			Parameters
796			----------
797			`rows`
798			: table of strings for which the types should be inferred, in row-major order
799
800			Returns
801			-------
802			inferred types
803
804			Examples
805			--------
806			```python
807			parser = TypeParser()
808			parser.infer_table([
809				["1",   "true",  "1"],
810				["2",   "false", "2.3"],
811				["3.4", "2",     "abc"],
812			])
813			# [float, int, str]
814			```
815		"""
816		rows_iter = iter(rows)
817		first_row = next(rows_iter, None)
818		if first_row is None:
819			return []
820
821		num_cols = len(first_row)
822		if num_cols == 0:
823			return []
824
825		table = _TypeTable([[self.infer(value)] for value in first_row])
826		for row in rows_iter:
827			table.add_row([self.infer(value) for value in row])
828
829		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the individual inferred types of every value in the table must be able to fit into memory.

Parameters

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]
def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]:
868	def parse(self, value: str) -> AnyValue:
869		"""
870			Parse a string and convert it to its underlying type
871
872			Parameters
873			----------
874			`value`
875			: the string to be parsed
876
877			Returns
878			-------
879			converted value
880
881			Examples
882			--------
883			```python
884			parser = TypeParser()
885			parser.parse("true")  # True
886			parser.parse("2.0")   # 2.
887			parser.parse("abc")   # "abc"
888			```
889		"""
890		return self._convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Parameters

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"
def parse_series( self, values: Iterable[str]) -> list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]:
893	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
894		"""
895			Parse a series of strings and convert them to their underlying common type
896
897			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
898
899			Parameters
900			----------
901			`values`
902			: series of strings to be parsed
903
904			Returns
905			-------
906			converted values
907
908			Examples
909			--------
910			```python
911			parser = TypeParser()
912			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
913			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
914			parser.parse_series(["true", "false", ""])  # [True, False, None]
915			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
916			```
917		"""
918		inferred = self.infer_series(values)
919		return [self._convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
922	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
923		"""
924			Parse a table of strings and convert them to the underlying common type of each column
925
926			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
927
928			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
929
930			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
931
932			Parameters
933			----------
934			`rows`
935			: table of strings to be parsed, in row-major order
936
937			`iterator`
938			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
939
940			Returns
941			-------
942			converted table of values, in row-major order
943
944			Examples
945			--------
946			```python
947			parser = TypeParser()
948			table = parser.parse_table([
949				["1", "5",   "true",  "1"],
950				["2", "6.7", "false", "2.3"],
951				["3", "8.0", "",      "abc"],
952			]):
953			assert table == [
954				[1, 5.,  True,  "1"],
955				[2, 6.7, False, "2.3"],
956				[3, 8.,  None,  "abc"],
957			]
958			```
959		"""
960		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type to which the values should be converted is determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

This is a function that computes the entire table and returns it all at once. The generator iterate_table() behaves analogously, except that it computes and yields each row one at a time.

Parameters

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",      "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]
def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
963	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
964		"""
965			Parse a table of strings for the underlying common type of each column, then convert and yield each row
966
967			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
968
969			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
970
971			Parameters
972			----------
973			`rows`
974			: table of strings to be parsed, in row-major order
975
976			Yields
977			-------
978			each row of converted table values
979
980			Examples
981			--------
982			```python
983			parser = TypeParser()
984			table = parser.iterate_table([
985				["1",   "true",  "1"],
986				["2",   "false", "2.3"],
987				["3.4", "2",     "abc"],
988			]):
989			assert next(table) == [1.,  1, "1"]
990			assert next(table) == [2.,  0, "2.3"]
991			assert next(table) == [3.4, 2, "abc"]
992			```
993		"""
994		inferred_types = self.infer_table(rows)
995
996		for row in rows:
997			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

This is a generator that computes and yields each row one at a time. The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

Parameters

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]
Inherited Members
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__init_subclass__
__format__
__sizeof__
__dir__
def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
156def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
157	"""
158		Reduce multiple types into a single common type.
159
160		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
161
162		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
163
164		Parameters
165		----------
166		`types`
167		: types to be reduced
168
169		Returns
170		-------
171		common reduced type
172
173		Examples
174		--------
175		```python
176		reduce_types([int, float])        # float
177		reduce_types([bool, int])         # int
178		reduce_types([int, float, str])   # str
179		```
180	"""
181	reduced_type: Union[AnyValueType, None] = None
182	for t in types:
183		if reduced_type is None:
184			reduced_type = t
185		elif t != reduced_type:
186			reduced_type = _merge_types(reduced_type, t)
187		if reduced_type == _TerminalValue:
188			return _TerminalValue
189
190	if reduced_type is None:
191		# types is empty
192		return GenericValue
193	else:
194		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Parameters

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str