Edit on GitHub

parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

  • treat inf as either a float or a normal string
  • give exact Decimal values instead of floats
  • detect inline lists
 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.2.5"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types')
class TypeParser:
  60class TypeParser:
  61	"""
  62		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
  63
  64		Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.
  65	"""
  66
  67	def __init__(self,
  68	    *,
  69		trim: bool=True,
  70		use_decimal: bool=False,
  71		list_delimiter: Optional[str]=None,
  72		none_values: Iterable[str]=[""],
  73		none_case_sensitive: bool=False,
  74		true_values: Iterable[str]=["true"],
  75		false_values: Iterable[str]=["false"],
  76		bool_case_sensitive: bool=False,
  77		int_case_sensitive: bool=False,
  78		inf_values: Iterable[str]=[],
  79		nan_values: Iterable[str]=[],
  80		float_case_sensitive: bool=False,
  81		case_sensitive: Optional[bool]=None,
  82	):
  83		"""
  84			Initialise a new parser
  85
  86			Parameters
  87			----------
  88			`trim`
  89			: whether leading and trailing whitespace should be stripped from strings
  90
  91			`use_decimal`
  92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
  93
  94			`list_delimiter`
  95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
  96
  97			`none_values`
  98			: list of strings that represent the value None
  99
 100			`none_case_sensitive`
 101			: whether matches against `none_values` should be made in a case-sensitive manner
 102
 103			`true_values`
 104			: list of strings that represent the bool value True
 105
 106			`false_values`
 107			: list of strings that represent the bool value False
 108
 109			`bool_case_sensitive`
 110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
 111
 112			`int_case_sensitive`
 113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
 114
 115			`inf_values`
 116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
 117
 118			`nan_values`
 119			: list of strings that represent a float or Decimal that is NaN (not a number)
 120
 121			`float_case_sensitive`
 122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
 123
 124			`case_sensitive`
 125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
 126
 127			Raises
 128			------
 129			`ValueError` if any of the options would lead to ambiguities during parsing
 130		"""
 131
 132		if case_sensitive is not None:
 133			none_case_sensitive = case_sensitive
 134			int_case_sensitive = case_sensitive
 135			bool_case_sensitive = case_sensitive
 136			float_case_sensitive = case_sensitive
 137
 138		self.trim = trim
 139		if self.trim:
 140			none_values = (value.strip() for value in none_values)
 141			true_values = (value.strip() for value in true_values)
 142			false_values = (value.strip() for value in false_values)
 143			inf_values = (value.strip() for value in inf_values)
 144			nan_values = (value.strip() for value in nan_values)
 145
 146		self.use_decimal = use_decimal
 147		self.list_delimiter = list_delimiter
 148
 149		self.none_case_sensitive = none_case_sensitive
 150		if not self.none_case_sensitive:
 151			none_values = (value.lower() for value in none_values)
 152		self.none_values = set(none_values)
 153
 154		self.bool_case_sensitive = bool_case_sensitive
 155		if not self.bool_case_sensitive:
 156			true_values = (value.lower() for value in true_values)
 157			false_values = (value.lower() for value in false_values)
 158		self.true_values = set(true_values)
 159		self.false_values = set(false_values)
 160
 161		self.int_case_sensitive = int_case_sensitive
 162
 163		self.float_case_sensitive = float_case_sensitive
 164		if not self.float_case_sensitive:
 165			inf_values = (value.lower() for value in inf_values)
 166			nan_values = (value.lower() for value in nan_values)
 167		self.inf_values = set(inf_values)
 168		self.nan_values = set(nan_values)
 169
 170		# Unconfigurable default values
 171		self._negative_char = "-"
 172		self._negative_chars = {self._negative_char, "−"}
 173		self._sign_chars = self._negative_chars | {"+"}
 174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
 175		self._digit_separators = {"_"}
 176		self._scientific_char = "e"
 177		self._float_separator = "."
 178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
 179		# special_chars = self._reserved_chars | self.list_delimiter
 180
 181		# Check if any special values conflict
 182		for name, special_values in [
 183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
 184			(_SpecialValue.NONE, self.none_values),
 185			(_SpecialValue.TRUE, self.true_values),
 186			(_SpecialValue.FALSE, self.false_values),
 187			(_SpecialValue.INF, self.inf_values),
 188			(_SpecialValue.NAN, self.nan_values),
 189		]:
 190			for special_value in special_values:
 191				if special_value in self._reserved_chars:
 192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
 193
 194				if name != _SpecialValue.NONE and self.is_none(special_value):
 195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
 196
 197				if (
 198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
 199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
 200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
 201				):
 202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
 203
 204				if self.is_int(special_value):
 205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
 206
 207				if self.use_decimal:
 208					if (
 209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
 210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
 211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
 212					):
 213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
 214				else:
 215					if (
 216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
 217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
 218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
 219					):
 220						raise ValueError(f"cannot use float value as {name}: {special_value}")
 221
 222
 223	def is_none(self, value: str) -> bool:
 224		"""
 225			Check if a string represents the value None
 226
 227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
 228
 229			Parameters
 230			----------
 231			`value`
 232			: string to be checked
 233
 234			Returns
 235			-------
 236			whether it is None
 237
 238			Examples
 239			--------
 240			```python
 241			parser = TypeParser()
 242			parser.parse_bool("")     # True
 243			parser.parse_bool("abc")  # False
 244			```
 245		"""
 246		if self.trim:
 247			value = value.strip()
 248		if not self.bool_case_sensitive:
 249			value = value.lower()
 250
 251		if value in self.none_values:
 252			return True
 253		else:
 254			return False
 255
 256
 257	def is_bool(self, value: str) -> bool:
 258		"""
 259			Check if a string represents a bool
 260
 261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
 262
 263			Parameters
 264			----------
 265			`value`
 266			: string to be checked
 267
 268			Returns
 269			-------
 270			whether it is a bool
 271
 272			Examples
 273			--------
 274			```python
 275			parser = TypeParser()
 276			parser.is_bool("true")  # True
 277			parser.is_bool("")      # True
 278			parser.is_bool("abc")   # False
 279			```
 280		"""
 281		if self.trim:
 282			value = value.strip()
 283
 284		if not self.bool_case_sensitive:
 285			value = value.lower()
 286		if value in self.true_values:
 287			return True
 288		if value in self.false_values:
 289			return True
 290
 291		return False
 292
 293
 294	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
 295		"""
 296			Check if a string represents an int
 297
 298			Parameters
 299			----------
 300			`value`
 301			: string to be checked
 302
 303			`allow_negative`
 304			: whether to accept negative values
 305
 306			`allow_sign`
 307			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
 308
 309			`allow_scientific`
 310			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 311
 312			Returns
 313			-------
 314			whether it is an int
 315
 316			Examples
 317			--------
 318			```python
 319			parser = TypeParser()
 320			parser.is_int("0")    # True
 321			parser.is_int("-1")   # True
 322			parser.is_int("abc")  # False
 323			parser.is_int("")     # False
 324			```
 325		"""
 326		if self.trim:
 327			value = value.strip()
 328
 329		if len(value) == 0:
 330			return False
 331
 332		if allow_scientific:
 333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
 334			if exp is not None:
 335				return self.is_int(
 336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
 337				) and self.is_int(
 338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
 339				)
 340
 341		if value[0] in self._sign_chars:
 342			if len(value) == 1:
 343				return False
 344			if not allow_sign:
 345				return False
 346			if not allow_negative and value[0] in self._negative_chars:
 347				return False
 348			value = value[1:]
 349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
 350			return False
 351
 352		prev_separated = False
 353		for c in value:
 354			if c in self._digit_separators:
 355				if prev_separated:
 356					return False
 357				prev_separated = True
 358			else:
 359				prev_separated = False
 360				if c not in self._digit_chars:
 361					return False
 362		return True
 363
 364
 365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 366		"""
 367			Check if a string represents a float (or equivalently, a Decimal)
 368
 369			This function will also return True if the string represents an int.
 370
 371			Alias: `is_decimal()`
 372
 373			Parameters
 374			----------
 375			`value`
 376			: string to be checked
 377
 378			`allow_scientific`
 379			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 380
 381			`allow_inf`
 382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 383
 384			`allow_nan`
 385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 386
 387			Returns
 388			-------
 389			whether it is a float or Decimal
 390
 391			Examples
 392			--------
 393			```python
 394			parser = TypeParser()
 395			parser.is_float("1.")       # True
 396			parser.is_float("12.3e-2")  # True
 397			parser.is_float("abc")      # False
 398			parser.is_float("")         # False
 399			```
 400		"""
 401		if self.trim:
 402			value = value.strip()
 403
 404		if len(value) > 0 and value[0] in self._sign_chars:
 405			value = value[1:]
 406
 407		if self.float_case_sensitive:
 408			special_value = value
 409		else:
 410			special_value = value.lower()
 411		if allow_inf and special_value in self.inf_values:
 412			return True
 413		if allow_nan and special_value in self.nan_values:
 414			return True
 415
 416		if len(value) == 0:
 417			return False
 418
 419		if allow_scientific:
 420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
 421			if exp is not None:
 422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
 423
 424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
 425		if frac is not None:
 426			if value == "" and frac == "":
 427				return False
 428			return (
 429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
 430			) and (
 431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
 432			)
 433
 434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
 435
 436
 437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 438		"""
 439			Alias of `is_float()`
 440		"""
 441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
 442
 443
 444	def parse_none(self, value: str) -> None:
 445		"""
 446			Parse a string and return it as the value None if possible
 447
 448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
 449
 450			Parameters
 451			----------
 452			`value`
 453			: string to be parsed
 454
 455			Returns
 456			-------
 457			parsed None value
 458
 459			Raises
 460			------
 461			`ValueError` if `value` cannot be parsed
 462
 463			Examples
 464			--------
 465			```python
 466			parser = TypeParser()
 467			parser.parse_bool("")     # None
 468			parser.parse_bool("abc")  # raises ValueError
 469			```
 470		"""
 471		if self.is_none(value):
 472			return None
 473		else:
 474			raise ValueError(f"not a none value: {value}")
 475
 476
 477	def parse_bool(self, value: str) -> bool:
 478		"""
 479			Parse a string and return it as a bool if possible
 480
 481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
 482
 483			Parameters
 484			----------
 485			`value`
 486			: string to be parsed
 487
 488			Returns
 489			-------
 490			parsed bool value
 491
 492			Raises
 493			------
 494			`ValueError` if `value` cannot be parsed
 495
 496			Examples
 497			--------
 498			```python
 499			parser = TypeParser()
 500			parser.parse_bool("true")   # True
 501			parser.parse_bool("FALSE")  # False
 502			```
 503		"""
 504		if self.trim:
 505			value = value.strip()
 506
 507		if self.bool_case_sensitive:
 508			special_value = value
 509		else:
 510			special_value = value.lower()
 511		if special_value in self.true_values:
 512			return True
 513		if special_value in self.false_values:
 514			return False
 515
 516		raise ValueError(f"not a boolean: {value}")
 517
 518
 519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
 520		"""
 521			Parse a string and return it as an int if possible
 522
 523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
 524
 525			Parameters
 526			----------
 527			`value`
 528			: string to be parsed
 529
 530			`allow_scientific`
 531			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 532
 533			Returns
 534			-------
 535			parsed int value
 536
 537			Raises
 538			------
 539			`ValueError` if `value` cannot be parsed
 540
 541			Examples
 542			--------
 543			```python
 544			parser = TypeParser()
 545			parser.parse_int("0")    # 0
 546			parser.parse_int("-1")   # -1
 547			parser.parse_int("2e3")  # 2000
 548			```
 549		"""
 550		if self.trim:
 551			value = value.strip()
 552
 553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
 554			if allow_scientific:
 555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
 556				if exp is not None:
 557					if value[0] in (self._negative_chars - {self._negative_char}):
 558						value = self._negative_char + value[1:]
 559					return int(value) * (10 ** int(exp))
 560
 561			if value[0] in (self._negative_chars - {self._negative_char}):
 562				value = self._negative_char + value[1:]
 563			return int(value)
 564
 565		elif self.is_bool(value):
 566			return int(self.parse_bool(value))
 567		else:
 568			raise ValueError(f"not an integer: {value}")
 569
 570
 571	def _parse_floatlike(self,
 572		value: str,
 573		converter: Callable[[Union[str, bool]], _FloatLike],
 574		inf_value: _FloatLike,
 575		nan_value: _FloatLike,
 576		*,
 577		allow_scientific: bool=True,
 578		allow_inf: bool=True,
 579		allow_nan: bool=True
 580	) -> _FloatLike:
 581		if self.trim:
 582			value = value.strip()
 583		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
 584			if self.float_case_sensitive:
 585				special_value = value
 586			else:
 587				special_value = value.lower()
 588			if allow_inf and special_value in self.inf_values:
 589				return inf_value
 590			if allow_nan and special_value in self.nan_values:
 591				return nan_value
 592
 593			if len(value) > 0 and value[0] in self._sign_chars:
 594				positive_part = value[1:]
 595				if self.float_case_sensitive:
 596					special_value = positive_part
 597				else:
 598					special_value = positive_part.lower()
 599				if allow_inf and special_value in self.inf_values:
 600					if value[0] in self._negative_chars:
 601						return -1 * inf_value
 602					else:
 603						return inf_value
 604				if allow_nan and special_value in self.nan_values:
 605					return nan_value
 606
 607				if value[0] in self._negative_chars:
 608					value = self._negative_char + positive_part
 609			return converter(value)
 610		elif self.is_bool(value):
 611			return converter(self.parse_bool(value))
 612		else:
 613			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
 614
 615
 616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
 617		"""
 618			Parse a string and return it as a (non-exact) float if possible
 619
 620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
 621
 622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
 623
 624			Parameters
 625			----------
 626			`value`
 627			: string to be parsed
 628
 629			`allow_scientific`
 630			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 631
 632			`allow_inf`
 633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 634
 635			`allow_nan`
 636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 637
 638			Returns
 639			-------
 640			parsed float value
 641
 642			Raises
 643			------
 644			`ValueError` if `value` cannot be parsed
 645
 646			Examples
 647			--------
 648			```python
 649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 650			parser.parse_float("1.")       # 1.
 651			parser.parse_float("1.23e2")   # 123.
 652			parser.parse_float("1.23e-2")  # 0.0123
 653			parser.parse_float("inf")      # math.inf
 654			```
 655		"""
 656		return self._parse_floatlike(value, float, math.inf, math.nan,
 657			allow_scientific=allow_scientific,
 658			allow_inf=allow_inf,
 659			allow_nan=allow_nan,
 660		)
 661
 662
 663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
 664		"""
 665			Parse a string and return it as an exact Decimal if possible
 666
 667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
 668
 669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
 670
 671			Parameters
 672			----------
 673			`value`
 674			: string to be parsed
 675
 676			`allow_scientific`
 677			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 678
 679			`allow_inf`
 680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 681
 682			`allow_nan`
 683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
 684
 685			Returns
 686			-------
 687			parsed Decimal value
 688
 689			Raises
 690			------
 691			`ValueError` if `value` cannot be parsed
 692
 693			Examples
 694			--------
 695			```python
 696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 697			parser.parse_decimal("1.")       # Decimal(1)
 698			parser.parse_decimal("1.23e2")   # Decimal(123)
 699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
 700			parser.parse_decimal("inf")      # Decimal(math.inf)
 701			```
 702		"""
 703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
 704			allow_scientific=allow_scientific,
 705			allow_inf=allow_inf,
 706			allow_nan=allow_nan,
 707		)
 708
 709
 710	def infer(self, value: str) -> AnyValueType:
 711		"""
 712			Infer the underlying type of a string
 713
 714			Also check for inline lists if `self.list_delimiter` is not None.
 715
 716			Parameters
 717			----------
 718			`value`
 719			: the string for which the type should be inferred
 720
 721			Returns
 722			-------
 723			inferred type
 724
 725			Examples
 726			--------
 727			```python
 728			parser = TypeParser()
 729			parser.infer("true")  # bool
 730			parser.infer("2.0")   # float
 731			parser.infer("abc")   # str
 732			```
 733		"""
 734		if self.is_none(value):
 735			return NoneType
 736		if self.is_bool(value):
 737			return bool
 738		if self.is_int(value):
 739			return int
 740		if self.is_float(value):
 741			if self.use_decimal:
 742				return Decimal
 743			else:
 744				return float
 745
 746		if self.trim:
 747			value = value.strip()
 748
 749		if self.list_delimiter is not None and self.list_delimiter in value:
 750			subvalues = value.split(self.list_delimiter)
 751			if self.trim:
 752				subvalues = [subvalue.strip() for subvalue in subvalues]
 753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
 754			r = list[reduced_type]
 755			return r
 756
 757		return GenericValue
 758
 759
 760	def infer_series(self, values: Iterable[str]) -> AnyValueType:
 761		"""
 762			Infer the underlying common type of a series of strings
 763
 764			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
 765
 766			Parameters
 767			----------
 768			`values`
 769			: series of strings for which the type should be inferred
 770
 771			Returns
 772			-------
 773			inferred type
 774
 775			Examples
 776			--------
 777			```python
 778			parser = TypeParser()
 779			parser.infer_series(["1", "2", "3.4"])       # float
 780			parser.infer_series(["true", "false", "2"])  # int
 781			parser.infer_series(["1", "2.3", "abc"])     # str
 782			```
 783		"""
 784		return reduce_types(self.infer(value) for value in values)
 785
 786
 787	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
 788		"""
 789			Infer the underlying common type for each column of a table of strings
 790
 791			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
 792
 793			Note that the individual inferred types of every value in the table must be able to fit into memory.
 794
 795			Parameters
 796			----------
 797			`rows`
 798			: table of strings for which the types should be inferred, in row-major order
 799
 800			Returns
 801			-------
 802			inferred types
 803
 804			Examples
 805			--------
 806			```python
 807			parser = TypeParser()
 808			parser.infer_table([
 809				["1",   "true",  "1"],
 810				["2",   "false", "2.3"],
 811				["3.4", "2",     "abc"],
 812			])
 813			# [float, int, str]
 814			```
 815		"""
 816		rows_iter = iter(rows)
 817		first_row = next(rows_iter, None)
 818		if first_row is None:
 819			return []
 820
 821		num_cols = len(first_row)
 822		if num_cols == 0:
 823			return []
 824
 825		table = _TypeTable([[self.infer(value)] for value in first_row])
 826		for row in rows_iter:
 827			table.add_row([self.infer(value) for value in row])
 828
 829		return [reduce_types(col) for col in table.cols]
 830
 831
 832	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
 833		"""
 834			Convert a string to the specified target type if possible
 835
 836			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
 837
 838			Parameters
 839			----------
 840			`value`
 841			: the string to be converted
 842
 843			`target_type`
 844			: type to which the value should be converted
 845
 846			Returns
 847			-------
 848			converted value
 849
 850			Raises
 851			-------
 852			`ValueError`
 853			: if `value` cannot be converted to `target_type`
 854
 855			`TypeError`
 856			: if `target_type` is not a valid type
 857
 858			Examples
 859			--------
 860			```python
 861			parser = TypeParser()
 862			parser.convert("true", bool)  # True
 863			parser.convert("2", int)      # 2
 864			parser.convert("2", float)    # 2.
 865			```
 866		"""
 867		base, type_args = _decompose_type(target_type)
 868		if base == NoneType:
 869			return self.parse_none(value)
 870		elif base == bool:
 871			return self.parse_bool(value)
 872		elif base == int:
 873			return self.parse_int(value)
 874		elif base == Decimal:
 875			return self.parse_decimal(value)
 876		elif base == float:
 877			return self.parse_float(value)
 878		elif base == str:
 879			return value
 880		elif base == Nullable:
 881			if self.is_none(value):
 882				return None
 883			else:
 884				if type_args is not  None and len(type_args) == 1 and type_args[0] != str:
 885					inner_type = type_args[0]
 886					return self.convert(value, inner_type)
 887				else:
 888					return value
 889		elif base == list:
 890			subvalues = value.split(self.list_delimiter)
 891			if self.trim:
 892				subvalues = [subvalue.strip() for subvalue in subvalues]
 893			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
 894				subtype = type_args[0]
 895				return [self.convert(subvalue, subtype) for subvalue in subvalues]
 896			else:
 897				return subvalues
 898		else:
 899			raise TypeError(f"cannot convert to type: {target_type}")
 900
 901
 902	def parse(self, value: str) -> AnyValue:
 903		"""
 904			Parse a string and convert it to its underlying type
 905
 906			Parameters
 907			----------
 908			`value`
 909			: the string to be parsed
 910
 911			Returns
 912			-------
 913			converted value
 914
 915			Examples
 916			--------
 917			```python
 918			parser = TypeParser()
 919			parser.parse("true")  # True
 920			parser.parse("2.0")   # 2.
 921			parser.parse("abc")   # "abc"
 922			```
 923		"""
 924		return self.convert(value, self.infer(value))
 925
 926
 927	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
 928		"""
 929			Parse a series of strings and convert them to their underlying common type
 930
 931			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
 932
 933			Parameters
 934			----------
 935			`values`
 936			: series of strings to be parsed
 937
 938			Returns
 939			-------
 940			converted values
 941
 942			Examples
 943			--------
 944			```python
 945			parser = TypeParser()
 946			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
 947			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
 948			parser.parse_series(["true", "false", ""])  # [True, False, None]
 949			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
 950			```
 951		"""
 952		inferred = self.infer_series(values)
 953		return [self.convert(value, inferred) for value in values]
 954
 955
 956	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
 957		"""
 958			Parse a table of strings and convert them to the underlying common type of each column
 959
 960			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
 961
 962			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
 963
 964			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
 965
 966			Parameters
 967			----------
 968			`rows`
 969			: table of strings to be parsed, in row-major order
 970
 971			`iterator`
 972			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
 973
 974			Returns
 975			-------
 976			converted table of values, in row-major order
 977
 978			Examples
 979			--------
 980			```python
 981			parser = TypeParser()
 982			table = parser.parse_table([
 983				["1", "5",   "true",  "1"],
 984				["2", "6.7", "false", "2.3"],
 985				["3", "8.0", "",      "abc"],
 986			]):
 987			assert table == [
 988				[1, 5.,  True,  "1"],
 989				[2, 6.7, False, "2.3"],
 990				[3, 8.,  None,  "abc"],
 991			]
 992			```
 993		"""
 994		return [converted_row for converted_row in self.iterate_table(rows)]
 995
 996
 997	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
 998		"""
 999			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1000
1001			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1002
1003			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1004
1005			Parameters
1006			----------
1007			`rows`
1008			: table of strings to be parsed, in row-major order
1009
1010			Yields
1011			-------
1012			each row of converted table values
1013
1014			Examples
1015			--------
1016			```python
1017			parser = TypeParser()
1018			table = parser.iterate_table([
1019				["1",   "true",  "1"],
1020				["2",   "false", "2.3"],
1021				["3.4", "2",     "abc"],
1022			]):
1023			assert next(table) == [1.,  1, "1"]
1024			assert next(table) == [2.,  0, "2.3"]
1025			assert next(table) == [3.4, 2, "abc"]
1026			```
1027		"""
1028		inferred_types = self.infer_table(rows)
1029
1030		for row in rows:
1031			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: Optional[str] = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: Optional[bool] = None)
 67	def __init__(self,
 68	    *,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			Parameters
 87			----------
 88			`trim`
 89			: whether leading and trailing whitespace should be stripped from strings
 90
 91			`use_decimal`
 92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 93
 94			`list_delimiter`
 95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 96
 97			`none_values`
 98			: list of strings that represent the value None
 99
100			`none_case_sensitive`
101			: whether matches against `none_values` should be made in a case-sensitive manner
102
103			`true_values`
104			: list of strings that represent the bool value True
105
106			`false_values`
107			: list of strings that represent the bool value False
108
109			`bool_case_sensitive`
110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
111
112			`int_case_sensitive`
113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
114
115			`inf_values`
116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
117
118			`nan_values`
119			: list of strings that represent a float or Decimal that is NaN (not a number)
120
121			`float_case_sensitive`
122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
123
124			`case_sensitive`
125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
126
127			Raises
128			------
129			`ValueError` if any of the options would lead to ambiguities during parsing
130		"""
131
132		if case_sensitive is not None:
133			none_case_sensitive = case_sensitive
134			int_case_sensitive = case_sensitive
135			bool_case_sensitive = case_sensitive
136			float_case_sensitive = case_sensitive
137
138		self.trim = trim
139		if self.trim:
140			none_values = (value.strip() for value in none_values)
141			true_values = (value.strip() for value in true_values)
142			false_values = (value.strip() for value in false_values)
143			inf_values = (value.strip() for value in inf_values)
144			nan_values = (value.strip() for value in nan_values)
145
146		self.use_decimal = use_decimal
147		self.list_delimiter = list_delimiter
148
149		self.none_case_sensitive = none_case_sensitive
150		if not self.none_case_sensitive:
151			none_values = (value.lower() for value in none_values)
152		self.none_values = set(none_values)
153
154		self.bool_case_sensitive = bool_case_sensitive
155		if not self.bool_case_sensitive:
156			true_values = (value.lower() for value in true_values)
157			false_values = (value.lower() for value in false_values)
158		self.true_values = set(true_values)
159		self.false_values = set(false_values)
160
161		self.int_case_sensitive = int_case_sensitive
162
163		self.float_case_sensitive = float_case_sensitive
164		if not self.float_case_sensitive:
165			inf_values = (value.lower() for value in inf_values)
166			nan_values = (value.lower() for value in nan_values)
167		self.inf_values = set(inf_values)
168		self.nan_values = set(nan_values)
169
170		# Unconfigurable default values
171		self._negative_char = "-"
172		self._negative_chars = {self._negative_char, "−"}
173		self._sign_chars = self._negative_chars | {"+"}
174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
175		self._digit_separators = {"_"}
176		self._scientific_char = "e"
177		self._float_separator = "."
178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
179		# special_chars = self._reserved_chars | self.list_delimiter
180
181		# Check if any special values conflict
182		for name, special_values in [
183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
184			(_SpecialValue.NONE, self.none_values),
185			(_SpecialValue.TRUE, self.true_values),
186			(_SpecialValue.FALSE, self.false_values),
187			(_SpecialValue.INF, self.inf_values),
188			(_SpecialValue.NAN, self.nan_values),
189		]:
190			for special_value in special_values:
191				if special_value in self._reserved_chars:
192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
193
194				if name != _SpecialValue.NONE and self.is_none(special_value):
195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
196
197				if (
198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
201				):
202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
203
204				if self.is_int(special_value):
205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
206
207				if self.use_decimal:
208					if (
209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
212					):
213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
214				else:
215					if (
216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
219					):
220						raise ValueError(f"cannot use float value as {name}: {special_value}")

Initialise a new parser

Parameters

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (infer() and infer_*()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, ignoring any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool:
223	def is_none(self, value: str) -> bool:
224		"""
225			Check if a string represents the value None
226
227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
228
229			Parameters
230			----------
231			`value`
232			: string to be checked
233
234			Returns
235			-------
236			whether it is None
237
238			Examples
239			--------
240			```python
241			parser = TypeParser()
242			parser.parse_bool("")     # True
243			parser.parse_bool("abc")  # False
244			```
245		"""
246		if self.trim:
247			value = value.strip()
248		if not self.bool_case_sensitive:
249			value = value.lower()
250
251		if value in self.none_values:
252			return True
253		else:
254			return False

Check if a string represents the value None

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.parse_bool("")     # True
parser.parse_bool("abc")  # False
def is_bool(self, value: str) -> bool:
257	def is_bool(self, value: str) -> bool:
258		"""
259			Check if a string represents a bool
260
261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
262
263			Parameters
264			----------
265			`value`
266			: string to be checked
267
268			Returns
269			-------
270			whether it is a bool
271
272			Examples
273			--------
274			```python
275			parser = TypeParser()
276			parser.is_bool("true")  # True
277			parser.is_bool("")      # True
278			parser.is_bool("abc")   # False
279			```
280		"""
281		if self.trim:
282			value = value.strip()
283
284		if not self.bool_case_sensitive:
285			value = value.lower()
286		if value in self.true_values:
287			return True
288		if value in self.false_values:
289			return True
290
291		return False

Check if a string represents a bool

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False
def is_int( self, value: str, *, allow_negative: bool = True, allow_sign: bool = True, allow_scientific: bool = True) -> bool:
294	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
295		"""
296			Check if a string represents an int
297
298			Parameters
299			----------
300			`value`
301			: string to be checked
302
303			`allow_negative`
304			: whether to accept negative values
305
306			`allow_sign`
307			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
308
309			`allow_scientific`
310			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
311
312			Returns
313			-------
314			whether it is an int
315
316			Examples
317			--------
318			```python
319			parser = TypeParser()
320			parser.is_int("0")    # True
321			parser.is_int("-1")   # True
322			parser.is_int("abc")  # False
323			parser.is_int("")     # False
324			```
325		"""
326		if self.trim:
327			value = value.strip()
328
329		if len(value) == 0:
330			return False
331
332		if allow_scientific:
333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
334			if exp is not None:
335				return self.is_int(
336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
337				) and self.is_int(
338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
339				)
340
341		if value[0] in self._sign_chars:
342			if len(value) == 1:
343				return False
344			if not allow_sign:
345				return False
346			if not allow_negative and value[0] in self._negative_chars:
347				return False
348			value = value[1:]
349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
350			return False
351
352		prev_separated = False
353		for c in value:
354			if c in self._digit_separators:
355				if prev_separated:
356					return False
357				prev_separated = True
358			else:
359				prev_separated = False
360				if c not in self._digit_chars:
361					return False
362		return True

Check if a string represents an int

Parameters

value : string to be checked

allow_negative : whether to accept negative values

allow_sign : whether to accept values prepended with a sign character. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that M must be an integer and X must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False
def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
366		"""
367			Check if a string represents a float (or equivalently, a Decimal)
368
369			This function will also return True if the string represents an int.
370
371			Alias: `is_decimal()`
372
373			Parameters
374			----------
375			`value`
376			: string to be checked
377
378			`allow_scientific`
379			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
380
381			`allow_inf`
382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
383
384			`allow_nan`
385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
386
387			Returns
388			-------
389			whether it is a float or Decimal
390
391			Examples
392			--------
393			```python
394			parser = TypeParser()
395			parser.is_float("1.")       # True
396			parser.is_float("12.3e-2")  # True
397			parser.is_float("abc")      # False
398			parser.is_float("")         # False
399			```
400		"""
401		if self.trim:
402			value = value.strip()
403
404		if len(value) > 0 and value[0] in self._sign_chars:
405			value = value[1:]
406
407		if self.float_case_sensitive:
408			special_value = value
409		else:
410			special_value = value.lower()
411		if allow_inf and special_value in self.inf_values:
412			return True
413		if allow_nan and special_value in self.nan_values:
414			return True
415
416		if len(value) == 0:
417			return False
418
419		if allow_scientific:
420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
421			if exp is not None:
422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
423
424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
425		if frac is not None:
426			if value == "" and frac == "":
427				return False
428			return (
429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
430			) and (
431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
432			)
433
434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Parameters

value : string to be checked

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False
def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
438		"""
439			Alias of `is_float()`
440		"""
441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None:
444	def parse_none(self, value: str) -> None:
445		"""
446			Parse a string and return it as the value None if possible
447
448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
449
450			Parameters
451			----------
452			`value`
453			: string to be parsed
454
455			Returns
456			-------
457			parsed None value
458
459			Raises
460			------
461			`ValueError` if `value` cannot be parsed
462
463			Examples
464			--------
465			```python
466			parser = TypeParser()
467			parser.parse_bool("")     # None
468			parser.parse_bool("abc")  # raises ValueError
469			```
470		"""
471		if self.is_none(value):
472			return None
473		else:
474			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("")     # None
parser.parse_bool("abc")  # raises ValueError
def parse_bool(self, value: str) -> bool:
477	def parse_bool(self, value: str) -> bool:
478		"""
479			Parse a string and return it as a bool if possible
480
481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
482
483			Parameters
484			----------
485			`value`
486			: string to be parsed
487
488			Returns
489			-------
490			parsed bool value
491
492			Raises
493			------
494			`ValueError` if `value` cannot be parsed
495
496			Examples
497			--------
498			```python
499			parser = TypeParser()
500			parser.parse_bool("true")   # True
501			parser.parse_bool("FALSE")  # False
502			```
503		"""
504		if self.trim:
505			value = value.strip()
506
507		if self.bool_case_sensitive:
508			special_value = value
509		else:
510			special_value = value.lower()
511		if special_value in self.true_values:
512			return True
513		if special_value in self.false_values:
514			return False
515
516		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False
def parse_int(self, value: str, *, allow_scientific: bool = True) -> int:
519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
520		"""
521			Parse a string and return it as an int if possible
522
523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
524
525			Parameters
526			----------
527			`value`
528			: string to be parsed
529
530			`allow_scientific`
531			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
532
533			Returns
534			-------
535			parsed int value
536
537			Raises
538			------
539			`ValueError` if `value` cannot be parsed
540
541			Examples
542			--------
543			```python
544			parser = TypeParser()
545			parser.parse_int("0")    # 0
546			parser.parse_int("-1")   # -1
547			parser.parse_int("2e3")  # 2000
548			```
549		"""
550		if self.trim:
551			value = value.strip()
552
553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
554			if allow_scientific:
555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
556				if exp is not None:
557					if value[0] in (self._negative_chars - {self._negative_char}):
558						value = self._negative_char + value[1:]
559					return int(value) * (10 ** int(exp))
560
561			if value[0] in (self._negative_chars - {self._negative_char}):
562				value = self._negative_char + value[1:]
563			return int(value)
564
565		elif self.is_bool(value):
566			return int(self.parse_bool(value))
567		else:
568			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that M must be an integer and X must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000
def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float:
616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
617		"""
618			Parse a string and return it as a (non-exact) float if possible
619
620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
621
622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
623
624			Parameters
625			----------
626			`value`
627			: string to be parsed
628
629			`allow_scientific`
630			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
631
632			`allow_inf`
633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
634
635			`allow_nan`
636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
637
638			Returns
639			-------
640			parsed float value
641
642			Raises
643			------
644			`ValueError` if `value` cannot be parsed
645
646			Examples
647			--------
648			```python
649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
650			parser.parse_float("1.")       # 1.
651			parser.parse_float("1.23e2")   # 123.
652			parser.parse_float("1.23e-2")  # 0.0123
653			parser.parse_float("inf")      # math.inf
654			```
655		"""
656		return self._parse_floatlike(value, float, math.inf, math.nan,
657			allow_scientific=allow_scientific,
658			allow_inf=allow_inf,
659			allow_nan=allow_nan,
660		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf
def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal:
663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
664		"""
665			Parse a string and return it as an exact Decimal if possible
666
667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
668
669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
670
671			Parameters
672			----------
673			`value`
674			: string to be parsed
675
676			`allow_scientific`
677			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
678
679			`allow_inf`
680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
681
682			`allow_nan`
683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
684
685			Returns
686			-------
687			parsed Decimal value
688
689			Raises
690			------
691			`ValueError` if `value` cannot be parsed
692
693			Examples
694			--------
695			```python
696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
697			parser.parse_decimal("1.")       # Decimal(1)
698			parser.parse_decimal("1.23e2")   # Decimal(123)
699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
700			parser.parse_decimal("inf")      # Decimal(math.inf)
701			```
702		"""
703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
704			allow_scientific=allow_scientific,
705			allow_inf=allow_inf,
706			allow_nan=allow_nan,
707		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)
def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
710	def infer(self, value: str) -> AnyValueType:
711		"""
712			Infer the underlying type of a string
713
714			Also check for inline lists if `self.list_delimiter` is not None.
715
716			Parameters
717			----------
718			`value`
719			: the string for which the type should be inferred
720
721			Returns
722			-------
723			inferred type
724
725			Examples
726			--------
727			```python
728			parser = TypeParser()
729			parser.infer("true")  # bool
730			parser.infer("2.0")   # float
731			parser.infer("abc")   # str
732			```
733		"""
734		if self.is_none(value):
735			return NoneType
736		if self.is_bool(value):
737			return bool
738		if self.is_int(value):
739			return int
740		if self.is_float(value):
741			if self.use_decimal:
742				return Decimal
743			else:
744				return float
745
746		if self.trim:
747			value = value.strip()
748
749		if self.list_delimiter is not None and self.list_delimiter in value:
750			subvalues = value.split(self.list_delimiter)
751			if self.trim:
752				subvalues = [subvalue.strip() for subvalue in subvalues]
753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
754			r = list[reduced_type]
755			return r
756
757		return GenericValue

Infer the underlying type of a string

Also check for inline lists if self.list_delimiter is not None.

Parameters

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str
def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
760	def infer_series(self, values: Iterable[str]) -> AnyValueType:
761		"""
762			Infer the underlying common type of a series of strings
763
764			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
765
766			Parameters
767			----------
768			`values`
769			: series of strings for which the type should be inferred
770
771			Returns
772			-------
773			inferred type
774
775			Examples
776			--------
777			```python
778			parser = TypeParser()
779			parser.infer_series(["1", "2", "3.4"])       # float
780			parser.infer_series(["true", "false", "2"])  # int
781			parser.infer_series(["1", "2.3", "abc"])     # str
782			```
783		"""
784		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str
def infer_table( self, rows: Iterable[Sequence[str]]) -> list[typing.Type[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]:
787	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
788		"""
789			Infer the underlying common type for each column of a table of strings
790
791			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
792
793			Note that the individual inferred types of every value in the table must be able to fit into memory.
794
795			Parameters
796			----------
797			`rows`
798			: table of strings for which the types should be inferred, in row-major order
799
800			Returns
801			-------
802			inferred types
803
804			Examples
805			--------
806			```python
807			parser = TypeParser()
808			parser.infer_table([
809				["1",   "true",  "1"],
810				["2",   "false", "2.3"],
811				["3.4", "2",     "abc"],
812			])
813			# [float, int, str]
814			```
815		"""
816		rows_iter = iter(rows)
817		first_row = next(rows_iter, None)
818		if first_row is None:
819			return []
820
821		num_cols = len(first_row)
822		if num_cols == 0:
823			return []
824
825		table = _TypeTable([[self.infer(value)] for value in first_row])
826		for row in rows_iter:
827			table.add_row([self.infer(value) for value in row])
828
829		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the individual inferred types of every value in the table must be able to fit into memory.

Parameters

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]
def convert( self, value: str, target_type: Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]:
832	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
833		"""
834			Convert a string to the specified target type if possible
835
836			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
837
838			Parameters
839			----------
840			`value`
841			: the string to be converted
842
843			`target_type`
844			: type to which the value should be converted
845
846			Returns
847			-------
848			converted value
849
850			Raises
851			-------
852			`ValueError`
853			: if `value` cannot be converted to `target_type`
854
855			`TypeError`
856			: if `target_type` is not a valid type
857
858			Examples
859			--------
860			```python
861			parser = TypeParser()
862			parser.convert("true", bool)  # True
863			parser.convert("2", int)      # 2
864			parser.convert("2", float)    # 2.
865			```
866		"""
867		base, type_args = _decompose_type(target_type)
868		if base == NoneType:
869			return self.parse_none(value)
870		elif base == bool:
871			return self.parse_bool(value)
872		elif base == int:
873			return self.parse_int(value)
874		elif base == Decimal:
875			return self.parse_decimal(value)
876		elif base == float:
877			return self.parse_float(value)
878		elif base == str:
879			return value
880		elif base == Nullable:
881			if self.is_none(value):
882				return None
883			else:
884				if type_args is not  None and len(type_args) == 1 and type_args[0] != str:
885					inner_type = type_args[0]
886					return self.convert(value, inner_type)
887				else:
888					return value
889		elif base == list:
890			subvalues = value.split(self.list_delimiter)
891			if self.trim:
892				subvalues = [subvalue.strip() for subvalue in subvalues]
893			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
894				subtype = type_args[0]
895				return [self.convert(subvalue, subtype) for subvalue in subvalues]
896			else:
897				return subvalues
898		else:
899			raise TypeError(f"cannot convert to type: {target_type}")

Convert a string to the specified target type if possible

Valid values for target_type include any return value from infer(), infer_series() and infer_table(). To infer and convert the string automatically, use parse(), parse_series() or parse_table() instead.

Parameters

value : the string to be converted

target_type : type to which the value should be converted

Returns

converted value

Raises

ValueError : if value cannot be converted to target_type

TypeError : if target_type is not a valid type

Examples

parser = TypeParser()
parser.convert("true", bool)  # True
parser.convert("2", int)      # 2
parser.convert("2", float)    # 2.
def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]:
902	def parse(self, value: str) -> AnyValue:
903		"""
904			Parse a string and convert it to its underlying type
905
906			Parameters
907			----------
908			`value`
909			: the string to be parsed
910
911			Returns
912			-------
913			converted value
914
915			Examples
916			--------
917			```python
918			parser = TypeParser()
919			parser.parse("true")  # True
920			parser.parse("2.0")   # 2.
921			parser.parse("abc")   # "abc"
922			```
923		"""
924		return self.convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Parameters

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"
def parse_series( self, values: Iterable[str]) -> list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]:
927	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
928		"""
929			Parse a series of strings and convert them to their underlying common type
930
931			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
932
933			Parameters
934			----------
935			`values`
936			: series of strings to be parsed
937
938			Returns
939			-------
940			converted values
941
942			Examples
943			--------
944			```python
945			parser = TypeParser()
946			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
947			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
948			parser.parse_series(["true", "false", ""])  # [True, False, None]
949			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
950			```
951		"""
952		inferred = self.infer_series(values)
953		return [self.convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
956	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
957		"""
958			Parse a table of strings and convert them to the underlying common type of each column
959
960			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
961
962			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
963
964			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
965
966			Parameters
967			----------
968			`rows`
969			: table of strings to be parsed, in row-major order
970
971			`iterator`
972			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
973
974			Returns
975			-------
976			converted table of values, in row-major order
977
978			Examples
979			--------
980			```python
981			parser = TypeParser()
982			table = parser.parse_table([
983				["1", "5",   "true",  "1"],
984				["2", "6.7", "false", "2.3"],
985				["3", "8.0", "",      "abc"],
986			]):
987			assert table == [
988				[1, 5.,  True,  "1"],
989				[2, 6.7, False, "2.3"],
990				[3, 8.,  None,  "abc"],
991			]
992			```
993		"""
994		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type to which the values should be converted is determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

This is a function that computes the entire table and returns it all at once. The generator iterate_table() behaves analogously, except that it computes and yields each row one at a time.

Parameters

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",      "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]
def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]:
 997	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
 998		"""
 999			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1000
1001			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1002
1003			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1004
1005			Parameters
1006			----------
1007			`rows`
1008			: table of strings to be parsed, in row-major order
1009
1010			Yields
1011			-------
1012			each row of converted table values
1013
1014			Examples
1015			--------
1016			```python
1017			parser = TypeParser()
1018			table = parser.iterate_table([
1019				["1",   "true",  "1"],
1020				["2",   "false", "2.3"],
1021				["3.4", "2",     "abc"],
1022			]):
1023			assert next(table) == [1.,  1, "1"]
1024			assert next(table) == [2.,  0, "2.3"]
1025			assert next(table) == [3.4, 2, "abc"]
1026			```
1027		"""
1028		inferred_types = self.infer_table(rows)
1029
1030		for row in rows:
1031			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

This is a generator that computes and yields each row one at a time. The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type to which the value sshould be converted is still determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

Parameters

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]
Inherited Members
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__init_subclass__
__format__
__sizeof__
__dir__
def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]:
156def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
157	"""
158		Reduce multiple types into a single common type.
159
160		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
161
162		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
163
164		Parameters
165		----------
166		`types`
167		: types to be reduced
168
169		Returns
170		-------
171		common reduced type
172
173		Examples
174		--------
175		```python
176		reduce_types([int, float])        # float
177		reduce_types([bool, int])         # int
178		reduce_types([int, float, str])   # str
179		```
180	"""
181	reduced_type: Union[AnyValueType, None] = None
182	for t in types:
183		if reduced_type is None:
184			reduced_type = t
185		elif t != reduced_type:
186			reduced_type = _merge_types(reduced_type, t)
187		if reduced_type == _TerminalValue:
188			return _TerminalValue
189
190	if reduced_type is None:
191		# types is empty
192		return GenericValue
193	else:
194		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Parameters

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str