Source code for ase2sprkkr.outputs.test.grammar_types.arrays

""" Grammar types for array, sequences and tables """
from collections import namedtuple
import numpy as np
import pyparsing as pp
import itertools
import copy
from typing import List, Optional, Union
from ..alternative_types import numpy_types

from ..grammar import generate_grammar, delimitedList, \
                      line_end, White
from .grammar_type import GrammarType, compare_numpy_values, type_from_type, \
                          type_from_default_value, TypedGrammarType
from ..decorators import add_to_signature, cached_property
from .basic import Integer, Real, Unsigned, real


[docs] class Array(GrammarType): """ A (numpy) array of values of one type """ delimiter=White(' \t').suppress() delimiter_str = ' ' array_access = True
[docs] def __init__(self, type, default_value=None, length=None, max_length=None, min_length=None, as_list=False, format=None, **kwargs): """ Parameters ---------- type The grammar type of the values in the list (it can be given by a python type) default_value The default value for the list length If it is set, the list have to have just this length (it sets ``min_`` and ``max_length`` to the ``length``) min_length The minimal allowed length of the list. max_length The maximal allowed length of the list. as_list Type of the value array. True means List, False means np.ndarray, or custom type (e.g. tuple) can be provided. However, the value can be set using tuple or list anyway. """ if isinstance(type, (list, np.ndarray)): if default_value is not None: raise ValueError("It is not possible for an Array to provide default_value both in 'default_value' and in 'type' argument") default_value = type type = type[0].__class__ self.type = type_from_type(type) if format is not None: self.type = self.type.copy() self.type.format = format self.as_list = as_list super().__init__(default_value=default_value, **kwargs) self.min_length = min_length or length self.max_length = max_length or length with generate_grammar(): grammar = self.type.grammar() grammar = delimitedList(grammar, self.delimiter) self._set_convert_action(grammar) grammar.setName(self.grammar_name()) self._grammar = grammar
[docs] def _set_convert_action(self, grammar): if self.as_list: if callable(self.as_list): grammar = grammar.setParseAction(lambda x: self.as_list(x.asList())) else: grammar = grammar.setParseAction(lambda x: [x.asList()]) else: grammar.setParseAction(lambda x: self.convert(x.asList()))
def __str__(self): if self.min_length == self.max_length: if self.min_length: length = f' of length {self.min_length}' else: length = '' else: if self.min_length is not None: length='{self.min_length}<=n' else: length='n' if self.max_length is not None: length+=f'<={self.max_length}' length=' with length ' return f"Array(of {self.type}{length})"
[docs] def grammar_name(self): gn = self.type.grammar_name() if self.min_length is not None and self.min_length == self.max_length: return f'{self.min_length}*{gn}' return f'{gn}{self.delimiter_str}{gn}{self.delimiter_str}...'
[docs] def _string(self, val): it = iter(val) i = next(it) out = self.type.string(i) for i in it: out += self.delimiter_str out += self.type.string(i) return out
[docs] def _validate(self, value, why='set'): if callable(self.as_list): cls = self.as_list elif self.as_list: cls = list else: cls = np.ndarray if not isinstance(value, cls): return f'A value of the {cls} type is required, a {value.__class__} is given' for i,v in enumerate(value): try: self.type.validate(v, why='set') except ValueError as e: raise ValueError("Value {} in the set is incorrect: {}".format(i, str(e))) from e if self.min_length is not None and len(value) < self.min_length: return f"The array should be at least {self.min_length} items long, it has {len(value)} items" if self.max_length is not None and len(value) > self.max_length: return f"The array can not have more than {self.max_length} items, it has {len(value)} items" return True
@cached_property def _dtype_condition(self): if np.__version__ >= '1.23': return lambda type, shape: True if np.__version__ >= '1.20': return lambda type, shape: type != object else: return lambda type, shape: type != object and not len(shape)
[docs] def convert(self, value): if self.as_list: if callable(self.as_list): return value if isinstance(value, self.as_list) else self.as_list(value) else: return list(value) if isinstance(value, tuple) else value type, shape = self.type.numpy_dtype() type = numpy_types.get(type,type) if not isinstance(value, np.ndarray): if not hasattr(value, '__iter__'): value = [ value ] ln=1 else: ln=len(value) if self._dtype_condition(type, shape): def validate(v): self.type.validate(v) return v value = ( validate(self.type.convert(i)) for i in value) out = np.fromiter(value, dtype = (type, shape), count=ln) else: value = [ self.type.convert(i) for i in value ] out = np.asarray(value) return out elif type is not value.dtype and np.dtype(type) is not value.dtype: for i in value: self.type.validate(self.type.convert(i)) # check conversion of only first row is sufficient break value = value.astype((type, shape)) return value
is_the_same_value = staticmethod(compare_numpy_values) @property def is_numpy_array(self): return not self.as_list
[docs] def numpy_dtype(self): dtype, shape = self.type.numpy_dtype() if self.min_length and self.min_length == self.max_length: return dtype, (self.min_length,) + shape return object
[docs] class SetOf(Array): """ Set of values of the same type. E.g. {1,2,3} """ delimiter = pp.Suppress(pp.Literal(',') | pp.Literal(';') | White(' \t')).setName('[,; ]') delimiter_str = ','
[docs] @add_to_signature(Array.__init__) def __init__(self, type, *args, **kwargs): kwargs.setdefault('prefix', '{') kwargs.setdefault('postfix', '}') super().__init__(type, *args, **kwargs)
[docs] def transform_grammar(self, grammar, param_name=False): return grammar | self.type.grammar(param_name).copy().addParseAction(lambda x: np.atleast_1d(x.asList()))
[docs] def copy_value(self, value): return copy.deepcopy(value)
[docs] class Complex(SetOf, TypedGrammarType): array_access = False numpy_type = complex
[docs] @add_to_signature(SetOf.__init__) def __init__(self, *args, **kwargs): super().__init__(Real.I, *args, as_list=complex, length=2, **kwargs)
[docs] def convert(self, value): return complex(value)
[docs] def _validate(self, value, why='set'): return isinstance(value, (complex, np.complexfloating)) or 'A complex value required, {value} given.'
[docs] def _grammar_name(self): return '{complex (as 2 reals)}'
[docs] def _string(self, val): return real._string(val.real) + ' ' + real._string(val.imag)
__str__ = GrammarType.__str__
[docs] class Sequence(GrammarType): """ A sequence of values of given types """
[docs] def __init__(self, *types, format='', format_all=False, allowed_values=None, default_values=False, names=None, as_list=False, **kwargs): super().__init__(**kwargs) if names: self.names = names if isinstance(names, dict) else {name:i for i,name in enumerate(names)} self.value_type = namedtuple("_".join(names), names) self.value_constructor = self.value_type self.value_constructor = self.value_type else: self.names = None if as_list: self.value_type = list self.value_constructor = lambda *x: [[*x]] else: self.value_type = tuple self.value_constructor = lambda *x: [ tuple(x) ] if isinstance(format, dict): format = { type_from_type(k):v for k,v in format.items() } if isinstance(format, (str, dict)): format = itertools.repeat(format) self.types = [ type_from_default_value(i, dfs, format_all=format_all) for i,dfs in zip(types, format) ] if allowed_values and not isinstance(allowed_values, set): allowed_values = set(allowed_values) self.allowed_values=allowed_values self.default_values=default_values
def __len__(self): return len(self.types) def __bool__(self): return True
[docs] def _grammar(self, param_name = False): def grm(type): g = type.grammar(param_name) if self.default_values and type.default_value is not None: g = g | pp.Empty().setParseAction(lambda x: type.default_value) return g grammars = [grm(i) for i in self.types] grammar = pp.And(grammars).setParseAction(lambda x: self.value_constructor(*x)) if self.allowed_values is not None: grammar.addConditionEx(lambda x: x[0] in self.allowed_values, lambda x: f'{x[0]} is not in the list of allowed values') return grammar
[docs] def _validate(self, value, why='set'): if not isinstance(value, (self.value_type)) or len(value) != len(self.types): return f'A tuple of {len(self.types)} values is required' for i,j in zip(self.types, value): i.validate(j, why=why) return True
[docs] def convert(self, value): if not isinstance(value, self.value_type): return self.value_constructor(*value) try: return self.value_constructor(*value) except TypeError: pass return value
[docs] def grammar_name(self): return " ".join( (f'{j.grammar_name()}' for j in self.types) )
[docs] def _string(self, val): out = [] for i,v in zip(self.types, val): out.append(' ') out.append(i.string(v)) return ''.join(out)
[docs] def enrich(self, option): class cls(option.__class__): def _get_index(sulf, name): if self.names and isinstance(name, str): return self.names[name] return name def __getitem__(self, key): key = self._get_index(key) return self()[key] def __setitem__(self, key, value): key = self._get_index(key) v = list(self()) v[key] = value self.set(v) if self.names: for n,i in self.names.items(): (lambda i: setattr(cls, n, property( lambda self: self[i], lambda self, v: self.__setitem__(i, v) )))(i) option.__class__ = cls
is_the_same_value = staticmethod(compare_numpy_values)
[docs] class SpecialColumn: """ A special column for :class:`Table` """
[docs] def __init__(self, table, column, label, format, *, default_format='<4'): if column.__class__ is str: label=column column=True column = Unsigned.I if column is True else column if column: if format and not (format is True and column.format): if format is True: format = default_format column = column.copy() column.format = format self.column = column self.label = label self.format = format
def __bool__(self): return bool(self.column)
[docs] def add_grammar(self, grammar): return self.column.grammar() + grammar
[docs] def add_header_grammar(self, grammar): if self.label: grammar = pp.CaselessKeyword(self.label) + grammar return grammar
[docs] def format_string(self, val): if not self.column: return '' return self.column.format_string(val)
[docs] def header(self): return self.format_string(self.label or '') + ' '
def __repr__(self): if not self.column: return "<Special column not present>" return f"<Special column{' ' if self.label else ''}{self.label} of type {self.column}>"
[docs] class Table(GrammarType): """ Table, optionaly with named columns, e.g. ::text IQ IREFQ IMQ NOQ ITOQ CONC 1 1 1 1 1 1.000 2 2 2 1 2 1.000 """ array_access = True name_in_grammar = False
[docs] def __init__(self, columns:List[GrammarType]=None, header:Optional[bool]=None, free_header=False, format = {float: '>22.14', None: '>16'}, format_all=True, numbering:Union[str,bool,GrammarType]=None, numbering_label=None, numbering_format=True, grouping=False, grouping_label=None, grouping_format=True, prefix=None, postfix=None, length=None, row_condition=None, flatten=False, default_values=False, named_result=None, group_size=None, group_size_format="{:<12}{}", groups_as_list=None, **kwargs): """ Parameters ---------- columns List of GrammarTypes that describes the columns of the table header Whether table will have header. Default None means True, if the names are specified free_header Do not require the exact content of the header, just skip one line during parsing format Pass this argument to the :class:`Sequence` describing the line format_string Pass this argument to the :class:`Sequence` describing the line numbering There will be one extra column on the begining of the table, with numbering starting from one. String argument means the label of the column. numbering_label The label of the numbering column (if not given in numbering). numbering_format Format for the numering column grouping If True, the data are not one table, but list of tables. Numbering then numbers the tables, not the rows of the tables. There will be one extra column which numbers rows within the table. grouping_label Similiar as `numbering_label` grouping_format Similiard as `numbering_format` prefix Prints the prefix before the table postfix Prints the postfix after the table length The table length (or the number of groups) row_condition The condition, that each row of the table should have satisfied flatten The resulting table will have one dimension. Is meaningful for table without named columns, with a same type of all columns named_result The resulting table will have numpy structured dtype with named columns. Default None means autodetection: it behaves as True if necessary, i.e.: if there are at least two columns with different types. group_size If grouping, there will be another line after the header (SPRKKR format of tables), which contains ``{NAME} {VALUE}``. The name should be specified in group_size. The value is the number of rows, which all subtables are required to have. group_size_format Format for the group_size line. groups_as_list If True - groups are contained in list If False - groups are contained in np.ndarray If None - True if group_size is defined (and thus if it is possible) kwargs Columns and their names can be assigned as kwargs, e.g. ``column1_name = float, column2_name = int, ...`` """ if columns is None: columns = kwargs kwargs = {} super().__init__(prefix=None, postfix=None, **kwargs) if isinstance(columns, dict): self.names = list(columns.keys()) columns = columns.values() else: self.names = None if header is None: header = bool(self.names) self.header = header if group_size: if not grouping: grouping=True self.group_size = group_size self.group_size_format = group_size_format self.sequence = Sequence(*columns, format=format, format_all=format_all, condition = row_condition, default_values=default_values) self.flatten = flatten self.free_header = free_header self.named_result = named_result self.length = length self.numbering = SpecialColumn(self, numbering, numbering_label, numbering_format) self.grouping = SpecialColumn(self, grouping, grouping_label, grouping_format) self.groups_as_list = bool(self.grouping) and ( groups_as_list if groups_as_list is not None else not group_size ) if self.grouping and not self.groups_as_list and not group_size: raise ValueError("Groups have to be returned as list, if there is not fixed number" " of items in a group")
[docs] def special_columns(self): if self.grouping: yield self.grouping if self.numbering: yield self.numbering
[docs] def _grammar(self, param_name=False): line = self.sequence.grammar(param_name) cols = 1 for i in self.special_columns(): cols+=1 line = i.add_grammar(line) grammar = delimitedList(line, line_end) if self.group_size: def set_g_size(x): grp_size.group_size = x[0] grp_size = Unsigned.I.grammar().copy().setParseAction(set_g_size) grammar = pp.Suppress(pp.CaselessKeyword(self.group_size) + grp_size + "\n") + grammar if self.names: if self.free_header: fh = pp.SkipTo(line_end) + line_end if callable(self.free_header): fh.addConditionEx(lambda x: self.free_header(x[0]), lambda x: f"This is not an allowed header for table {param_name}: {x[0]}" ) grammar = pp.Suppress(fh) + grammar else: def names(): for n in self.names: if ' ' in n: """ multiple column headers for one column are allowed -- see Occupation section""" yield from map(pp.CaselessKeyword, n.split(' ')) elif n != '': yield pp.CaselessKeyword(n) header = pp.And(list(names())) for i in self.special_columns(): header = i.add_header_grammar(header) grammar = pp.Suppress(header + pp.lineEnd) + grammar def data_numbering(s, loc, x): numbers = x[::2] datas = x[1::2] if not numbers == [*range(1, len(numbers) + 1)]: raise pp.ParseException(s, loc, 'The first column should contain row numbering') return tabelize(datas) def data_grouping(s, loc, data): out = [] curr = [] c = 0 if len(data) % (2 * grp_size.group_size): raise pp.ParseException(s, loc, "The data in the table should be grouped " f"by groups of length {grp_size.group_size}.") for i in range(0, len(data), 2): c+=1 if c>grp_size.group_size: out.append(tabelize(curr)) curr = [] c = 1 if data[i] != c: raise pp.ParseException(s, loc, "The value in the first column should " "be a row numbering within a group of a length {grp_size.group_size}." f"{data[i]} encountered, {c} expected.") curr.append(data[i + 1]) out.append(tabelize(curr)) if self.groups_as_list: return [ out ] else: return np.asarray(out, dtype = self._numpy_type) def data_numbering_grouping(s, loc, data): if len(data) == 0: return data def numbering_error(i, expected): raise pp.ParseException(s, loc, "The value in the first column should " "be a monotone numbering of a groups. " "On the {i//3+1}th row, {data[i]} encountered, {expected} expected.") val = data[0] if val != 1: numbering_error(0, "1") out = [] curr = [] grp = 0 for i in range(0, len(data), 3): if data[i] !=val: if data[i] != val + 1: numbering_error(i, f"'{val}' or '{val+1}'") out.append(tabelize(curr)) curr = [] val = data[i] grp = 0 grp += 1 if data[i + 1] != grp: raise pp.ParseException(s, loc, "The value in the second column should " "be a row numbering within a group. " f"{data[1+1]} encountered, {grp} expected.") curr.append(data[i + 2]) out.append(tabelize(curr)) if self.group_size: for i,g in enumerate(out): if len(g) != grp_size.group_size: raise pp.ParseException(s, loc, f"The group {i//3 + 1} should have " f"{grp_size.group_size} members, it has {len(g)}.") if self.groups_as_list: return [ out ] else: return np.asarray(out, dtype = self._numpy_type) def tabelize(x): out = np.array(x, dtype=self._numpy_type) if self.flatten: out = out.ravel() return out if self.numbering: if self.grouping: grammar.addParseAction(data_numbering_grouping) else: grammar.addParseAction(data_numbering) elif self.grouping: grammar.addParseAction(data_grouping) else: grammar.addParseAction(tabelize) return grammar
[docs] def _string(self, data): out = [] if self.header: names = ((i[1] if isinstance(i, tuple) else i) for i in self.names) formated = ( t.format_string(n) for n,t in zip(names, self.sequence.types) ) header = ' '.join(formated) for i in self.special_columns(): header = i.header() + header out.append(header) if self.group_size: out.append(self.group_size_format.format(self.group_size, len(data[0]) if data is not None and len(data) else 1)) def line(row, prefix): line = self.sequence.string(row) if prefix: line = prefix + line out.append(line) def unflatten(val): if val.ndim == 1 and not isinstance(self._numpy_type, list): ln = len(self.sequence) val = val.reshape(len(val) // ln, ln) return val if self.grouping: for n, group in enumerate(data, 1): for g, d in enumerate(unflatten(group), 1): line(d, self.grouping.format_string(n) + self.numbering.format_string(g)) else: for n, d in enumerate(unflatten(data),1): line(d, self.numbering.format_string(n)) return '\n'.join(out)
[docs] def _validate(self, value, why='set'): def validate(value): if not isinstance(value, np.ndarray): return f"Numpy array as a value required {value.__class__} given" dtype = self._numpy_type dim = 1 if isinstance(dtype, list) or self.flatten else 2 if len(value.shape) != dim: return f"The array should have dimension={dim}, it has dimension {len(value.shape)}" if value.dtype != dtype: return f"The data type of the value should be {dtype}, it is {value.dtype}" if dim==2 and value.shape[1] != len(self.sequence.types): return f"The array is required to have {len(self.sequence.types)} columns, it has {value.shape[1]}" return True if not self.grouping: out = validate(value) if out is not True: return out else: if not isinstance(value, (list, np.ndarray)): return "The grouped table accepts list of table data as a value, "\ f"{value.__class__} have been given." for i,d in enumerate(value): out = validate(d) if out is not True: return f"The {i+1}th table has invalid data: {out}" size = None if self.group_size: for i, d in enumerate(value): if size is None: size = len(d) else: if size != len(d): return f"Size of the {i+1}th table should be same as of the first one, i.e. {size}" if self.length is not None and self.length != len(value): return f"The array is required to have {self.length} rows, it has {len(value)}" return True
[docs] def convert(self, value): if self.groups_as_list: value = [ np.asarray(v, dtype = self._numpy_type) for v in value ] return np.asarray(value, dtype = self._numpy_type)
@cached_property def numpy_type(self): if self.groups_as_list: return object return self._numpy_type @cached_property def _numpy_type(self): types = self.sequence.types if not self.named_result: dtype = types[0].numpy_type for t in types[1:]: if t.numpy_type != dtype: if self.named_result is False: raise ValueError("The table should not have structured dtype, but has columns have different types") break else: return dtype names = self.names or itertools.repeat('') return list(zip(names, (i.numpy_type for i in types)))
[docs] def number_of_columns(self): return len(self.sequence.types)
[docs] def zero_data(self, length): """ Return array of zeros with the given number of rows and with the dtype of the table """ dtype = self.numpy_type if isinstance(dtype, list): return np.zeros(length, dtype) else: return np.zeros((length, self.number_of_columns()), dtype)
[docs] def grammar_name(self): if self.names: data = " ".join( (f'{i}:{j.grammar_name()}' for i,j in zip(self.names, self.sequence.types) ) ) else: data = self.sequence.grammar_name() return f"<TABLE of {data}>"
is_the_same_value = staticmethod(compare_numpy_values)
set_of_integers = SetOf(Integer.I) """ A standard grammar type instance for array of integers (of any length, used by variant types) """ set_of_reals = SetOf(Real.I) """ A standard grammar type instance for array of reals (of any length, used by variant types) """ complex_number = Complex.I = Complex() # NOQA E741 """ A standard grammar type instance for complex numbers """