Source code for cis_interface.yamlfile

import os
import pprint
import pystache
import yaml
from cis_interface import backwards
from cis_interface.schema import get_schema


[docs]def load_yaml(fname): r"""Parse a yaml file defining a run. Args: fname (str): Path to the yaml file. Returns: dict: Contents of yaml file. """ fname = os.path.realpath(fname) if not os.path.isfile(fname): raise IOError("Unable locate yaml file %s" % fname) # Open file and parse yaml with open(fname, 'r') as f: # Mustache replace vars yamlparsed = f.read() yamlparsed = pystache.render( backwards.StringIO(yamlparsed).getvalue(), dict(os.environ)) yamlparsed = yaml.safe_load(yamlparsed) yamlparsed['working_dir'] = os.path.dirname(fname) return yamlparsed
[docs]def prep_yaml(files): r"""Prepare yaml to be parsed by Cerberus using schema including covering backwards compatible options. Args: files (str, list): Either the path to a single yaml file or a list of yaml files. Returns: dict: YAML ready to be parsed using schema. """ # Load each file if isinstance(files, str): files = [files] yamls = [load_yaml(f) for f in files] # Combine models & connections yml_all = {} for k in ['models', 'connections']: yml_all[k] = [] for yml in yamls: if k not in yml: yml[k] = [] if not isinstance(yml[k], list): yml[k] = [yml[k]] if k[:-1] in yml: if isinstance(yml[k[:-1]], list): yml[k] += yml.pop(k[:-1]) else: yml[k].append(yml.pop(k[:-1])) for x in yml[k]: if isinstance(x, dict): x.setdefault('working_dir', yml['working_dir']) yml_all[k] += yml[k] # Prep models and connections iodict = {'inputs': {}, 'outputs': {}, 'connections': []} for yml in yml_all['models']: prep_model(yml, iodict) bridge_io_drivers(yml_all, iodict) yml_all['connections'] += iodict['connections'] for yml in yml_all['connections']: prep_connection(yml, iodict) return yml_all
[docs]def prep_model(yml, iodict): r"""Prepare yaml model entry for parsing with schema. Args: yml (dict): YAML entry for model that will be modified. iodict (dict): Log of all input/outputs. """ class2language = get_schema().class2language # Standardize format of input/output for io in ['inputs', 'outputs']: if io not in yml: yml[io] = [] if not isinstance(yml[io], list): yml[io] = [yml[io]] if io[:-1] in yml: if isinstance(yml[io[:-1]], list): yml[io] += yml.pop(io[:-1]) else: yml[io].append(yml.pop(io[:-1])) for i in range(len(yml[io])): if isinstance(yml[io][i], str): yml[io][i] = dict(name=yml[io][i]) if 'driver' in yml[io][i]: yml[io][i].setdefault('working_dir', yml['working_dir']) if 'name' in yml[io][i]: iodict[io][yml[io][i]['name']] = yml[io][i] # Replace driver with language if ('language' not in yml) and ('driver' in yml): yml['language'] = class2language[yml.pop('driver')][0] # String to list if ('client_of' in yml) and (not isinstance(yml['client_of'], list)): yml['client_of'] = [yml['client_of']]
[docs]def prep_connection(yml, iodict): r"""Prepare yaml connection entry for parsing with schema. Args: yml (dict): YAML entry for connection that will be modified. iodict (dict): Log of all input/outputs. """ # Plural if ('inputs' in yml): yml['input'] = yml.pop('inputs') if ('outputs' in yml): yml['output'] = yml.pop('outputs') # Input if ('input' in yml): if isinstance(yml['input'], list) and (len(yml['input']) == 1): yml['input'] = yml['input'][0] if ((isinstance(yml['input'], str) and (yml['input'] not in iodict['outputs']) and ('input_file' not in yml))): yml['input_file'] = yml.pop('input') elif isinstance(yml['input'], list): for x in yml['input']: if x not in iodict['outputs']: raise RuntimeError('%s not connected and cannot be a file.' % x) if isinstance(yml.get('input_file', None), str): yml['input_file'] = dict(name=yml['input_file']) if ('read_meth' in yml): if 'input_file' not in yml: raise ValueError("'read_meth' set, but input is not a file.") yml['input_file'].update(**rwmeth2filetype(yml.pop('read_meth'))) # Output if ('output' in yml): if isinstance(yml['output'], list) and (len(yml['output']) == 1): yml['output'] = yml['output'][0] if ((isinstance(yml['output'], str) and (yml['output'] not in iodict['inputs']) and ('output_file' not in yml))): yml['output_file'] = yml.pop('output') elif isinstance(yml['output'], list): for x in yml['output']: if x not in iodict['inputs']: raise RuntimeError('%s not connected and cannot be a file.' % x) if isinstance(yml.get('output_file', None), str): yml['output_file'] = dict(name=yml['output_file']) if ('write_meth' in yml): if 'output_file' not in yml: raise ValueError("'write_meth' set, but output is not a file.") yml['output_file'].update(**rwmeth2filetype(yml.pop('write_meth'))) # Error if neither input nor output is connected to a model if ('input_file' in yml) and ('output_file' in yml): raise RuntimeError(("Input '%s' and Output '%s' are both being " + "considered as files since neither is connected " + "to a model.") % (yml['input_file']['name'], yml['output_file']['name'])) # Move remaining keys to input/output comm/file working_dir = yml.pop('working_dir', None) s = get_schema() conn_keys = list(set(s['connection'].keys())) def migrate_keys(from_dict, to_dict): klist = list(from_dict.keys()) if not isinstance(to_dict, list): to_dict = [to_dict] for k in klist: if k not in conn_keys: v = from_dict.pop(k) for d in to_dict: d.setdefault(k, v) if 'input_file' in yml: if working_dir: yml['input_file'].setdefault('working_dir', working_dir) migrate_keys(yml, yml['input_file']) elif 'output_file' in yml: if working_dir: yml['output_file'].setdefault('working_dir', working_dir) migrate_keys(yml, yml['output_file']) elif 'input' in yml: if isinstance(yml['input'], list): io_names = yml['input'] else: io_names = [yml['input']] allio = [iodict['outputs'][x] for x in io_names] migrate_keys(yml, allio)
# Error should be raised if there is no input
[docs]def bridge_io_drivers(yml, iodict): r"""Create connection from input/output driver. Args: dict: YAML entry for model output that should be modified. iodict (dict): Log of all input/outputs. """ s = get_schema() conn_keys_gen = ['input', 'input_file', 'output', 'output_file'] file_keys = list(set(s['file'].keys()) - set(s['comm'].keys())) conn_keys = list(set(s['connection'].keys()) - set(conn_keys_gen)) idrivers = [] odrivers = [] pairs = [] new_connections = [] for x in iodict['inputs'].values(): if ('driver' in x) and ('args' in x): idrivers.append((x['args'], x['name'])) for x in iodict['outputs'].values(): if ('driver' in x) and ('args' in x): iidx = None for i, (iargs, iname) in enumerate(idrivers): if x['args'] == iargs: iidx = i break if iidx is not None: pairs.append((x['name'], idrivers.pop(iidx)[1])) else: odrivers.append((x['args'], x['name'])) # Create direct connections from output to input for (oname, iname) in pairs: oyml = iodict['outputs'][oname] iyml = iodict['inputs'][iname] conn = dict(input=oname, output=iname) new_connections.append((oyml, iyml, conn)) oyml.pop('working_dir', None) iyml.pop('working_dir', None) # File input for k, v in idrivers: oyml = None iyml = iodict['inputs'][v] fyml = dict(name=k, filetype=cdriver2filetype(iyml['driver'])) conn = dict(input_file=fyml, output=v) new_connections.append((oyml, iyml, conn)) # File output for k, v in odrivers: oyml = iodict['outputs'][v] iyml = None fyml = dict(name=k, filetype=cdriver2filetype(oyml['driver'])) conn = dict(output_file=fyml, input=v) new_connections.append((oyml, iyml, conn)) # Transfer keyword arguments for oyml, iyml, conn in new_connections: if oyml is None: fkey = 'input_file' fyml = iyml ymls = [iyml] elif iyml is None: fkey = 'output_file' fyml = oyml ymls = [oyml] else: fkey = None fyml = None ymls = [oyml, iyml] # File keywords if fyml is not None: for k in file_keys: if k in fyml: conn[fkey][k] = fyml.pop(k) # Connection keywords for y in ymls: for k in conn_keys: if k not in y: continue if k == 'translator': conn.setdefault(k, []) conn[k].append(y.pop(k)) else: conn.setdefault(k, y.pop(k)) del y['driver'], y['args'] # Add connection iodict['connections'].append(conn)
[docs]def rwmeth2filetype(rw_meth): out = {} if rw_meth == 'all': out['filetype'] = 'binary' elif rw_meth == 'line': out['filetype'] = 'ascii' elif rw_meth == 'table_array': out['filetype'] = 'table' out['as_array'] = True else: out['filetype'] = rw_meth return out
[docs]def cdriver2filetype(driver): r"""Convert a connection driver to a file type. Args: driver (str): The name of the connection driver. Returns: str: The corresponding file type for the driver. """ schema = get_schema() conntypes = schema.class2conntype filetypes = schema.class2filetype if driver not in conntypes: raise ValueError("%s is not a registered connection driver." % driver) icomm, ocomm, direction = conntypes[driver][0] if direction == 'input': ftype = filetypes[icomm][0] else: ftype = filetypes[ocomm][0] return ftype
[docs]def parse_yaml(files): r"""Parse list of yaml files. Args: files (str, list): Either the path to a single yaml file or a list of yaml files. Raises: ValueError: If the yml dictionary is missing a required keyword or has an invalid value. RuntimeError: If one of the I/O channels is not initialized with driver information. Returns: dict: Dictionary of information parsed from the yamls. """ # Parse files using schema yml_prep = prep_yaml(files) v = get_schema().validator yml_norm = v.normalized(yml_prep) if not v.validate(yml_norm): pprint.pprint(yml_norm) pprint.pprint(v.errors) raise ValueError("Invalid yaml.") yml_all = v.document # Parse models, then connections to ensure connections can be processed existing = None for k in ['models', 'connections']: for yml in yml_all[k]: existing = parse_component(yml, k[:-1], existing=existing) # Make sure that I/O channels initialized for io in ['input', 'output']: for k, v in existing[io].items(): if 'driver' not in v: raise RuntimeError("No driver established for %s channel %s" % ( io, k)) # Link io drivers back to models existing = link_model_io(existing) return existing
[docs]def parse_component(yml, ctype, existing=None): r"""Parse a yaml entry for a component, adding it to the list of existing components. Args: yml (dict): YAML dictionary for a component. ctype (str): Component type. This can be 'input', 'output', 'model', or 'connection'. existing (dict, optional): Dictionary of existing components. Defaults to empty dict. Raises: TypeError: If yml is not a dictionary. ValueError: If dtype is not 'input', 'output', 'model', or 'connection'. ValueError: If the component already exists. Returns: dict: All components identified. """ if not isinstance(yml, dict): raise TypeError("Component entry in yml must be a dictionary.") ctype_list = ['input', 'output', 'model', 'connection', 'model_input', 'model_output'] if existing is None: existing = {k: {} for k in ctype_list} if ctype not in ctype_list: raise ValueError("'%s' is not a recognized component.") # Parse based on type if ctype == 'model': existing = parse_model(yml, existing) elif ctype == 'connection': existing = parse_connection(yml, existing) # Ensure component dosn't already exist if yml['name'] in existing[ctype]: raise ValueError("%s is already a registered '%s' component." % ( yml['name'], ctype)) existing[ctype][yml['name']] = yml return existing
[docs]def parse_model(yml, existing): r"""Parse a yaml entry for a model. Args: yml (dict): YAML dictionary for a model. existing (dict): Dictionary of existing components. Returns: dict: Updated log of all entries. """ _lang2driver = get_schema().language2class yml['driver'] = _lang2driver[yml.pop('language')] # Add server driver if yml.get('is_server', False): srv = {'name': yml['name'], 'driver': 'ServerDriver', 'args': yml['name'] + '_SERVER', 'working_dir': yml['working_dir']} yml['inputs'].append(srv) yml['clients'] = [] # Add client driver if yml.get('client_of', []): srv_names = yml['client_of'] # prep_model converts to list # if isinstance(srv_names, str): # srv_names = [srv_names] yml['client_of'] = srv_names for srv in srv_names: cli = {'name': '%s_%s' % (srv, yml['name']), 'driver': 'ClientDriver', 'args': srv + '_SERVER', 'working_dir': yml['working_dir']} yml['outputs'].append(cli) # Model index and I/O channels yml['model_index'] = len(existing['model']) for io in ['inputs', 'outputs']: for x in yml[io]: x['model_driver'] = [yml['name']] existing = parse_component(x, io[:-1], existing=existing) return existing
[docs]def parse_connection(yml, existing): r"""Parse a yaml entry for a connection between I/O channels. Args: yml (dict): YAML dictionary for a connection. existing (dict): Dictionary of existing components. Raises: RuntimeError: If the 'input' entry is not a model output or file. RuntimeError: If neither the 'input' or 'output' entries correspond to model I/O channels. Returns: dict: Updated log of all entries. """ schema = get_schema() comm2conn = schema.conntype2class ftyp2comm = schema.filetype2class conn_keys_gen = ['input', 'input_file', 'output', 'output_file'] conn_keys = list(set(schema['connection'].keys()) - set(conn_keys_gen)) yml_conn = {} if not isinstance(yml.get('input', []), list): yml['input'] = [yml['input']] if not isinstance(yml.get('output', []), list): yml['output'] = [yml['output']] # File input if 'input_file' in yml: fname = os.path.expanduser(yml['input_file']['name']) if not os.path.isabs(fname): fname = os.path.join(yml['input_file']['working_dir'], fname) fname = os.path.normpath(fname) if not os.path.isfile(fname): raise RuntimeError(("Input '%s' not found in any of the registered " + "model outputs and is not a file.") % fname) args = fname name = ','.join(yml['output']) ocomm_pair = None icomm_pair = (ftyp2comm[yml['input_file']['filetype']], 'DefaultComm') yml_conn.update(**yml['input_file']) # File output elif 'output_file' in yml: fname = os.path.expanduser(yml['output_file']['name']) if not yml['output_file'].get('in_temp', False): if not os.path.isabs(fname): fname = os.path.join(yml['output_file']['working_dir'], fname) fname = os.path.normpath(fname) args = fname name = ','.join(yml['input']) ocomm_pair = ('DefaultComm', ftyp2comm[yml['output_file']['filetype']]) icomm_pair = None yml_conn.update(**yml['output_file']) # Generic Input/Output else: iname = ','.join(yml['input']) oname = ','.join(yml['output']) args = '%s_to_%s' % (iname, oname) name = args # TODO: Use RMQ drivers when models are on different machines # ocomm_pair = ('DefaultComm', 'rmq') # icomm_pair = ('rmq', 'DefaultComm') ocomm_pair = ('DefaultComm', 'DefaultComm') icomm_pair = ('DefaultComm', 'DefaultComm') # Output driver xo = None if ocomm_pair is not None: iyml = yml['input'] iname = ','.join(iyml) if len(iyml) == 1: xo = existing['output'][iyml[0]] else: xo = {'name': iname, 'comm': [], 'model_driver': []} for y in iyml: xo['comm'].append(existing['output'][y]) xo['model_driver'] += existing['output'][y]['model_driver'] del existing['output'][y] existing = parse_component(xo, 'output', existing) xo['args'] = args xo['driver'] = comm2conn[(ocomm_pair[0], ocomm_pair[1], 'output')] # Input driver xi = None if icomm_pair is not None: oyml = yml['output'] oname = ','.join(oyml) if len(oyml) == 1: xi = existing['input'][oyml[0]] else: xi = {'name': oname, 'comm': [], 'model_driver': []} for y in oyml: xi['comm'].append(existing['input'][y]) xi['model_driver'] += existing['input'][y]['model_driver'] del existing['input'][y] existing = parse_component(xi, 'input', existing) xi['args'] = args xi['driver'] = comm2conn[(icomm_pair[0], icomm_pair[1], 'input')] # Transfer connection keywords to one connection driver yml_conn.pop('name', None) for k in conn_keys: if k in yml: yml_conn[k] = yml[k] if xi is None: xo.update(**yml_conn) else: xi.update(**yml_conn) yml['name'] = name return existing