Source code for cis_interface.yamlfile

import os
import pystache
import yaml
from cis_interface import backwards


[docs]def load_yaml(fname): r"""Parse a yaml file defining a run. Args: fname (str): Path to the yaml file. Returns: dict: Contents of yaml file. """ fname = os.path.realpath(fname) if not os.path.isfile(fname): raise IOError("Unable locate yaml file %s" % fname) # Open file and parse yaml with open(fname, 'r') as f: # Mustache replace vars yamlparsed = f.read() yamlparsed = pystache.render( backwards.StringIO(yamlparsed).getvalue(), dict(os.environ)) yamlparsed = yaml.safe_load(yamlparsed) yamlparsed['workingDir'] = os.path.dirname(fname) return yamlparsed
[docs]def parse_yaml(files): r"""Parse list of yaml files. Args: files (str, list): Either the path to a single yaml file or a list of yaml files. Raises: RuntimeError: If one of the I/O channels is not initialized with driver information. Returns: dict: Dictionary of information parsed from the yamls. """ existing = dict(input={}, output={}, model={}, connection={}) if isinstance(files, str): files = [files] # Load each file yamls = [load_yaml(f) for f in files] # Parse models, then connections to ensure connections can be processed for k in ['models', 'connections']: for yml in yamls: if k not in yml: yml[k] = [] if not isinstance(yml[k], list): yml[k] = [yml[k]] if k[:-1] in yml: if isinstance(yml[k[:-1]], list): yml[k] += yml.pop(k[:-1]) else: yml[k].append(yml.pop(k[:-1])) for x in yml[k]: existing = parse_component(x, k[:-1], yml['workingDir'], existing=existing) # Make sure that I/O channels initialized for io in ['input', 'output']: for k, v in existing[io].items(): if 'driver' not in v: raise RuntimeError("No driver established for %s channel %s" % ( io, k)) return existing
[docs]def parse_component(yml, ctype, yamldir, existing=None): r"""Parse a yaml entry for a component, adding it to the list of existing components. Args: yml (dict): YAML dictionary for a component. ctype (str): Component type. This can be 'input', 'output', 'model', or 'connection'. yamldir (str): Full path to directory containing the yaml this component was in. existing (dict, optional): Dictionary of existing components. Defaults to empty dict. Raises: TypeError: If yml is not a dictionary. ValueError: If dtype is not 'input', 'output', 'model', or 'connection'. RuntimeError: If the yml dictionary is missing a required keyword. ValueError: If the component already exists. RuntimeError: If 'kwargs' is an entry in the yml. Returns: dict: All components identified. """ if not isinstance(yml, dict): raise TypeError("Component entry in yml must be a dictionary.") ctype_list = ['input', 'output', 'model', 'connection'] if existing is None: existing = {k: {} for k in ctype_list} if ctype not in ctype_list: raise ValueError("'%s' is not a recognized component.") # Parse based on type if ctype == 'model': existing = parse_model(yml, yamldir, existing) elif ctype in ['input', 'output']: existing = parse_io(yml, yamldir, existing) elif ctype == 'connection': existing = parse_connection(yml, yamldir, existing) yml['workingDir'] = yamldir # Ensure component dosn't already exist if yml['name'] in existing[ctype]: raise ValueError("%s is already a registered '%s' component." % ( yml['name'], ctype)) existing[ctype][yml['name']] = yml return existing
[docs]def parse_model(yml, yamldir, existing): r"""Parse a yaml entry for a model. Args: yml (dict): YAML dictionary for a model. yamldir (str): Full path to directory containing the yaml this component was in. existing (dict): Dictionary of existing components. Raises: RuntimeError: If the yml dictionary is missing a required keyword. Returns: dict: Updated log of all entries. """ kws_required = ['name', 'driver', 'args'] for k in kws_required: if k not in yml: raise RuntimeError(("The yml specs for component '%s' is missing " + "required keyword '%s'.") % ( yml.get('name', None), k)) # Init I/O channels for io in ['inputs', 'outputs']: if io not in yml: yml[io] = [] if not isinstance(yml[io], list): yml[io] = [yml[io]] if io[:-1] in yml: if isinstance(yml[io[:-1]], list): yml[io] += yml.pop(io[:-1]) else: yml[io].append(yml.pop(io[:-1])) for i in range(len(yml[io])): if isinstance(yml[io][i], str): yml[io][i] = dict(name=yml[io][i]) # Add server driver if yml.get('is_server', False): srv = {'name': yml['name'], 'driver': 'ServerDriver', 'args': yml['name'] + '_SERVER'} yml['inputs'].append(srv) yml['clients'] = [] # Add client driver if yml.get('client_of', []): srv_names = yml['client_of'] if isinstance(srv_names, str): srv_names = [srv_names] yml['client_of'] = srv_names for srv in srv_names: cli = {'name': '%s_%s' % (srv, yml['name']), 'driver': 'ClientDriver', 'args': srv + '_SERVER'} yml['outputs'].append(cli) # Model index and I/O channels yml['model_index'] = len(existing['model']) for io in ['inputs', 'outputs']: for x in yml[io]: x['model_driver'] = yml['name'] existing = parse_component(x, io[:-1], yamldir, existing=existing) return existing
[docs]def parse_io(yml, yamldir, existing): r"""Parse a yaml entry for an I/O channel. Args: yml (dict): YAML dictionary for an I/O channel. yamldir (str): Full path to directory containing the yaml this component was in. existing (dict): Dictionary of existing components. Raises: RuntimeError: If the yml dictionary is missing a required keyword. Returns: dict: Updated log of all entries. """ is_driver = ('driver' in yml) kws_required = ['name'] if is_driver: kws_required += ['driver', 'args'] for k in kws_required: if k not in yml: raise RuntimeError(("The yml specs for component '%s' is missing " + "required keyword '%s'.") % ( yml.get('name', None), k)) return existing
[docs]def parse_connection(yml, yamldir, existing): r"""Parse a yaml entry for a connection between I/O channels. Args: yml (dict): YAML dictionary for a connection. yamldir (str): Full path to directory containing the yaml this component was in. existing (dict): Dictionary of existing components. Raises: RuntimeError: If the yml dictionary is missing a required keyword. AssertionError: If the 'input' or 'output' entry is not a string. RuntimeError: If the 'input' entry is not a model output or file. RuntimeError: If neither the 'input' or 'output' entries correspond to model I/O channels. ValueError: If the 'input' is a file and 'read_meth' entry is not 'all', 'line', 'table', or 'table_array'. ValueError: If the 'output' is a file and 'write_meth' entry is not 'all', 'line', 'table', or 'table_array'. Returns: dict: Updated log of all entries. """ kws_required = ['input', 'output'] for k in kws_required: if k not in yml: raise RuntimeError(("The yml specs for component '%s' is missing " + "required keyword '%s'.") % ( yml.get('name', None), k)) assert(isinstance(yml['input'], str)) assert(isinstance(yml['output'], str)) in_name = yml.pop('input') out_name = yml.pop('output') # File input if in_name not in existing['output']: in_path = os.path.realpath(os.path.join(yamldir, in_name)) if not os.path.isfile(in_path): raise RuntimeError(("Input '%s' not found in any of the registered " + "model outputs and is not a file.") % in_name) if out_name not in existing['input']: raise RuntimeError(("Output '%s' not found in any of the model " + "inputs and cannot be a file.") % out_name) args = in_path xi = existing['input'][out_name] xi['args'] = args read_meth = yml.pop('read_meth', 'all') if read_meth == 'all': xi['driver'] = 'FileInputDriver' elif read_meth == 'line': xi['driver'] = 'AsciiFileInputDriver' elif read_meth == 'table': xi['driver'] = 'AsciiTableInputDriver' elif read_meth == 'table_array': xi['driver'] = 'AsciiTableInputDriver' xi['as_array'] = True elif read_meth == 'pandas': xi['driver'] = 'PandasFileInputDriver' else: raise ValueError("Invalid read_meth '%s'." % read_meth) xo = None # File output elif out_name not in existing['input']: xo = existing['output'][in_name] in_temp = xo.get('in_temp', yml.get('in_temp', 'False')) if isinstance(in_temp, backwards.string_types): in_temp = eval(in_temp) if in_temp: out_path = out_name xo['in_temp'] = True else: out_path = os.path.realpath(os.path.join(yamldir, out_name)) args = out_path xo['args'] = args write_meth = yml.pop('write_meth', 'all') if write_meth == 'all': xo['driver'] = 'FileOutputDriver' elif write_meth == 'line': xo['driver'] = 'AsciiFileOutputDriver' elif write_meth == 'table': xo['driver'] = 'AsciiTableOutputDriver' elif write_meth == 'table_array': xo['driver'] = 'AsciiTableOutputDriver' xo['as_array'] = True elif write_meth == 'pandas': xo['driver'] = 'PandasFileOutputDriver' else: raise ValueError("Invalid write_meth '%s'." % write_meth) xi = None # Generic Input/Output else: args = '%s_to_%s' % (in_name, out_name) # TODO: Use RMQ drivers when models are on different machines # Output xo = existing['output'][in_name] xo['args'] = args xo['driver'] = 'OutputDriver' # Input xi = existing['input'][out_name] xi['args'] = args xi['driver'] = 'InputDriver' # Transfer connection keywords to one connection driver if xi is None: xo.update(**yml) else: xi.update(**yml) yml['name'] = args # Direct comm keywords to input/output comm_fields = ['format_str', 'field_names', 'field_units'] if xi is not None: xi.setdefault('icomm_kws', dict()) for k in comm_fields: if k in xi: v = xi[k] if k in ['field_names', 'field_units']: xi['icomm_kws'].setdefault(k, [n.strip() for n in v.split(',')]) else: xi['icomm_kws'].setdefault(k, v) if xo is not None: xo.setdefault('ocomm_kws', dict()) for k in comm_fields: if k in xo: v = xo[k] if k in ['field_names', 'field_units']: xo['ocomm_kws'].setdefault(k, [n.strip() for n in v.split(',')]) else: xo['ocomm_kws'].setdefault(k, v) return existing