Module vflow.convert

Useful functions for converting between different types (dicts, lists, tuples, etc.)

Expand source code
'''Useful functions for converting between different types (dicts, lists, tuples, etc.)
'''
from copy import deepcopy
from uuid import uuid4

from vflow.vset import PREV_KEY
from vflow.subkey import Subkey
import pandas as pd
from pandas import DataFrame


def init_args(args_tuple: tuple, names=None):
    ''' converts tuple of arguments to a list of dicts
    Params
    ------
    names: optional, list-like
        gives names for each of the arguments in the tuple
    '''
    if names is None:
        names = ['start'] * len(args_tuple)
    else:
        assert len(names) == len(args_tuple), 'names should be same length as args_tuple'

    output_dicts = []
    for (i, ele) in enumerate(args_tuple):
        output_dicts.append({
            (Subkey(names[i], 'init'), ): args_tuple[i],
            PREV_KEY: ('init', ),
        })
    return output_dicts


def s(x):
    '''Gets shape of a list/tuple/ndarray
    '''
    if type(x) in [list, tuple]:
        return len(x)
    else:
        return x.shape

def init_step(idx, cols):
    for i in range(idx, len(cols)):
        if cols[i] != 'init':
            return 'init-' + cols[i]

def dict_to_df(d: dict):
    '''Converts a dictionary with tuple keys
    into a pandas DataFrame
    '''
    d_copy = {k:d[k] for k in d if k != PREV_KEY}
    df = pd.Series(d_copy).reset_index()
    if len(d_copy.keys()) > 0:
        cols = [sk.origin for sk in list(d_copy.keys())[0]] + ['out']
        # set each init col to init-{next_module_set}
        cols = [c if c != 'init' else init_step(idx, cols) for idx, c in enumerate(cols) ]
        df.set_axis(cols, axis=1, inplace=True)
    return df

def compute_interval(df: DataFrame, d_label, wrt_label, accum: list=['std']):
    '''Compute an interval (std. dev) of d_label column with 
    respect to pertubations in the wrt_label column
    '''
    df = df.astype({wrt_label: str})
    return df[[wrt_label, d_label]].groupby(wrt_label).agg(accum)

def to_tuple(lists: list):
    '''Convert from lists to unpacked  tuple
    Ex. [[x1, y1], [x2, y2], [x3, y3]] -> ([x1, x2, x3], [y1, y2, y3])
    Ex. [[x1, y1]] -> ([x1], [y1])
    Ex. [m1, m2, m3] -> [m1, m2, m3]
    Allows us to write X, y = ([x1, x2, x3], [y1, y2, y3])
    '''
    n_mods = len(lists)
    if n_mods <= 1:
        return lists
    if not type(lists[0]) == list:
        return lists
    n_tup = len(lists[0])
    tup = [[] for _ in range(n_tup)]
    for i in range(n_mods):
        for j in range(n_tup):
            tup[j].append(lists[i][j])
    return tuple(tup)


def to_list(tup: tuple):
    '''Convert from tuple to packed list
    Ex. ([x1, x2, x3], [y1, y2, y3]) -> [[x1, y1], [x2, y2], [x3, y3]]
    Ex. ([x1], [y1]) -> [[x1, y1]]
    Ex. ([x1, x2, x3]) -> [[x1], [x2], [x3]]
    Ex. (x1) -> [[x1]]
    Ex. (x1, y1) -> [[x1, y1]]
    Ex. (x1, x2, x3, y1, y2, y3) -> [[x1, y1], [x2, y2], [x3, y3]]
    Ex. (x1, x2, x3, y1, y2) -> Error
    Allows us to call function with arguments in a loop
    '''
    n_tup = len(tup)
    if n_tup == 0:
        return []
    elif not isinstance(tup[0], list):
        # the first element is data
        if n_tup == 1:
            return list(tup)
        if n_tup % 2 != 0:
            raise ValueError('Don\'t know how to handle uneven number of args '
                             'without a list. Please wrap your args in a list.')
        # assume first half of args is input and second half is outcome
        return [list(el) for el in zip(tup[:(n_tup // 2)], tup[(n_tup // 2):])]
    elif n_tup == 1:
        return [[x] for x in tup[0]]
    n_mods = len(tup[0])
    lists_packed = [[] for _ in range(n_mods)]
    for i in range(n_mods):
        for j in range(n_tup):
            lists_packed[i].append(tup[j][i])
    return lists_packed


def sep_dicts(d: dict, n_out: int = 1):
    '''converts dictionary with value being saved as an iterable into multiple dictionaries
    Assumes every value has same length n_out

    Params
    ------
    d: {k1: (x1, y1), k2: (x2, y2), ...,  '__prev__': p}
    n_out: the number of dictionaries to separate d into

    Returns
    -------
    sep_dicts: [{k1: x1, k2: x2, ..., '__prev__': p}, {k1: y1, k2: y2, '__prev__': p}]
    '''
    # empty dict -- return empty dict
    if n_out == 1:
        return d
    else:
        # try separating dict into multiple dicts
        sep_dicts_id = str(uuid4()) # w/ high prob, uuid4 is unique
        sep_dicts = [dict() for x in range(n_out)]
        for key, value in d.items():
            if key != PREV_KEY:
                for i in range(n_out):
                    # assumes the correct sub-key for item i is in the i-th position
                    new_key = (key[i],) + key[n_out:]
                    new_key[-1]._sep_dicts_id = sep_dicts_id
                    sep_dicts[i][new_key] = value[i]

        # add back prev
        prev = d[PREV_KEY]
        for i in range(n_out):
            sep_dicts[i][PREV_KEY] = prev
        return sep_dicts


def combine_keys(left_key, right_key):
    if len(left_key) < len(right_key):
        match_key = left_key
        compare_key = right_key
    else:
        match_key = right_key
        compare_key = left_key
    match_subkeys = [subkey for subkey in match_key if subkey.is_matching()]
    if len(match_subkeys) > 0:
        matched_subkeys = []
        for subkey in match_subkeys:
            for c_subkey in compare_key:
                if subkey.matches(c_subkey):
                    matched_subkeys.append(subkey)
                    break
                elif subkey.mismatches(c_subkey):
                    # subkeys with same origin but different values are rejected
                    return ()
        if len(matched_subkeys) > 0:
            # always filter on right key
            filtered_key = tuple([subkey for subkey in right_key if subkey not in matched_subkeys])
            combined_key = left_key + filtered_key
            return combined_key
        else:
            return left_key + right_key
    else:
        return left_key + right_key


def combine_dicts(*args: dict, base_case=True):
    '''Combines any number of dictionaries into a single dictionary. Dictionaries
    are combined left to right, matching on the subkeys of the arg that has
    fewer matching requirements.
    '''
    n_args = len(args)
    combined_dict = {}
    if n_args == 0:
        return combined_dict
    elif n_args == 1:
        for k in args[0]:
            # wrap the dict values in tuples; this is helpful so that when we
            # pass the values to a module fun in we can just use * expansion
            if k != PREV_KEY:
                combined_dict[k] = (args[0][k],)
            else:
                combined_dict[k] = args[0][k]
        return combined_dict
    elif n_args == 2:
        for k0 in args[0]:
            for k1 in args[1]:

                if k0 == PREV_KEY or k1 == PREV_KEY:
                    continue

                combined_key = combine_keys(k0, k1)

                if len(combined_key) > 0:
                    if base_case:
                        combined_dict[combined_key] = (args[0][k0], args[1][k1])
                    else:
                        combined_dict[combined_key] = args[0][k0] + (args[1][k1],)

        prev_tup = ()
        for i in range(2):
            if PREV_KEY in args[i]:
                prev_tup += args[i][PREV_KEY]
        combined_dict[PREV_KEY] = prev_tup
        return combined_dict
    else:
        # combine the first two dicts and call recursively with remaining args
        return combine_dicts(combine_dicts(args[0], args[1]), *args[2:], base_case=False)


def apply_modules(modules: dict, data_dict: dict):
    out_dict = {}
    for mod_k in modules:
        for data_k in data_dict:
            if mod_k == PREV_KEY or data_k == PREV_KEY:
                continue

            combined_key = combine_keys(data_k, mod_k)

            if len(combined_key) > 0:
                out_dict[combined_key] = deepcopy(modules[mod_k])(*data_dict[data_k])

    return out_dict

Functions

def apply_modules(modules: dict, data_dict: dict)
Expand source code
def apply_modules(modules: dict, data_dict: dict):
    out_dict = {}
    for mod_k in modules:
        for data_k in data_dict:
            if mod_k == PREV_KEY or data_k == PREV_KEY:
                continue

            combined_key = combine_keys(data_k, mod_k)

            if len(combined_key) > 0:
                out_dict[combined_key] = deepcopy(modules[mod_k])(*data_dict[data_k])

    return out_dict
def combine_dicts(*args: dict, base_case=True)

Combines any number of dictionaries into a single dictionary. Dictionaries are combined left to right, matching on the subkeys of the arg that has fewer matching requirements.

Expand source code
def combine_dicts(*args: dict, base_case=True):
    '''Combines any number of dictionaries into a single dictionary. Dictionaries
    are combined left to right, matching on the subkeys of the arg that has
    fewer matching requirements.
    '''
    n_args = len(args)
    combined_dict = {}
    if n_args == 0:
        return combined_dict
    elif n_args == 1:
        for k in args[0]:
            # wrap the dict values in tuples; this is helpful so that when we
            # pass the values to a module fun in we can just use * expansion
            if k != PREV_KEY:
                combined_dict[k] = (args[0][k],)
            else:
                combined_dict[k] = args[0][k]
        return combined_dict
    elif n_args == 2:
        for k0 in args[0]:
            for k1 in args[1]:

                if k0 == PREV_KEY or k1 == PREV_KEY:
                    continue

                combined_key = combine_keys(k0, k1)

                if len(combined_key) > 0:
                    if base_case:
                        combined_dict[combined_key] = (args[0][k0], args[1][k1])
                    else:
                        combined_dict[combined_key] = args[0][k0] + (args[1][k1],)

        prev_tup = ()
        for i in range(2):
            if PREV_KEY in args[i]:
                prev_tup += args[i][PREV_KEY]
        combined_dict[PREV_KEY] = prev_tup
        return combined_dict
    else:
        # combine the first two dicts and call recursively with remaining args
        return combine_dicts(combine_dicts(args[0], args[1]), *args[2:], base_case=False)
def combine_keys(left_key, right_key)
Expand source code
def combine_keys(left_key, right_key):
    if len(left_key) < len(right_key):
        match_key = left_key
        compare_key = right_key
    else:
        match_key = right_key
        compare_key = left_key
    match_subkeys = [subkey for subkey in match_key if subkey.is_matching()]
    if len(match_subkeys) > 0:
        matched_subkeys = []
        for subkey in match_subkeys:
            for c_subkey in compare_key:
                if subkey.matches(c_subkey):
                    matched_subkeys.append(subkey)
                    break
                elif subkey.mismatches(c_subkey):
                    # subkeys with same origin but different values are rejected
                    return ()
        if len(matched_subkeys) > 0:
            # always filter on right key
            filtered_key = tuple([subkey for subkey in right_key if subkey not in matched_subkeys])
            combined_key = left_key + filtered_key
            return combined_key
        else:
            return left_key + right_key
    else:
        return left_key + right_key
def compute_interval(df: pandas.core.frame.DataFrame, d_label, wrt_label, accum: list = ['std'])

Compute an interval (std. dev) of d_label column with respect to pertubations in the wrt_label column

Expand source code
def compute_interval(df: DataFrame, d_label, wrt_label, accum: list=['std']):
    '''Compute an interval (std. dev) of d_label column with 
    respect to pertubations in the wrt_label column
    '''
    df = df.astype({wrt_label: str})
    return df[[wrt_label, d_label]].groupby(wrt_label).agg(accum)
def dict_to_df(d: dict)

Converts a dictionary with tuple keys into a pandas DataFrame

Expand source code
def dict_to_df(d: dict):
    '''Converts a dictionary with tuple keys
    into a pandas DataFrame
    '''
    d_copy = {k:d[k] for k in d if k != PREV_KEY}
    df = pd.Series(d_copy).reset_index()
    if len(d_copy.keys()) > 0:
        cols = [sk.origin for sk in list(d_copy.keys())[0]] + ['out']
        # set each init col to init-{next_module_set}
        cols = [c if c != 'init' else init_step(idx, cols) for idx, c in enumerate(cols) ]
        df.set_axis(cols, axis=1, inplace=True)
    return df
def init_args(args_tuple: tuple, names=None)

converts tuple of arguments to a list of dicts Params


names: optional, list-like gives names for each of the arguments in the tuple

Expand source code
def init_args(args_tuple: tuple, names=None):
    ''' converts tuple of arguments to a list of dicts
    Params
    ------
    names: optional, list-like
        gives names for each of the arguments in the tuple
    '''
    if names is None:
        names = ['start'] * len(args_tuple)
    else:
        assert len(names) == len(args_tuple), 'names should be same length as args_tuple'

    output_dicts = []
    for (i, ele) in enumerate(args_tuple):
        output_dicts.append({
            (Subkey(names[i], 'init'), ): args_tuple[i],
            PREV_KEY: ('init', ),
        })
    return output_dicts
def init_step(idx, cols)
Expand source code
def init_step(idx, cols):
    for i in range(idx, len(cols)):
        if cols[i] != 'init':
            return 'init-' + cols[i]
def s(x)

Gets shape of a list/tuple/ndarray

Expand source code
def s(x):
    '''Gets shape of a list/tuple/ndarray
    '''
    if type(x) in [list, tuple]:
        return len(x)
    else:
        return x.shape
def sep_dicts(d: dict, n_out: int = 1)

converts dictionary with value being saved as an iterable into multiple dictionaries Assumes every value has same length n_out

Params

d: {k1: (x1, y1), k2: (x2, y2), …, 'prev': p} n_out: the number of dictionaries to separate d into

Returns

sep_dicts : [{k1: x1, k2: x2, ..., '__prev__': p}, {k1: y1, k2: y2, '__prev__': p}]
 
Expand source code
def sep_dicts(d: dict, n_out: int = 1):
    '''converts dictionary with value being saved as an iterable into multiple dictionaries
    Assumes every value has same length n_out

    Params
    ------
    d: {k1: (x1, y1), k2: (x2, y2), ...,  '__prev__': p}
    n_out: the number of dictionaries to separate d into

    Returns
    -------
    sep_dicts: [{k1: x1, k2: x2, ..., '__prev__': p}, {k1: y1, k2: y2, '__prev__': p}]
    '''
    # empty dict -- return empty dict
    if n_out == 1:
        return d
    else:
        # try separating dict into multiple dicts
        sep_dicts_id = str(uuid4()) # w/ high prob, uuid4 is unique
        sep_dicts = [dict() for x in range(n_out)]
        for key, value in d.items():
            if key != PREV_KEY:
                for i in range(n_out):
                    # assumes the correct sub-key for item i is in the i-th position
                    new_key = (key[i],) + key[n_out:]
                    new_key[-1]._sep_dicts_id = sep_dicts_id
                    sep_dicts[i][new_key] = value[i]

        # add back prev
        prev = d[PREV_KEY]
        for i in range(n_out):
            sep_dicts[i][PREV_KEY] = prev
        return sep_dicts
def to_list(tup: tuple)

Convert from tuple to packed list Ex. ([x1, x2, x3], [y1, y2, y3]) -> [[x1, y1], [x2, y2], [x3, y3]] Ex. ([x1], [y1]) -> [[x1, y1]] Ex. ([x1, x2, x3]) -> [[x1], [x2], [x3]] Ex. (x1) -> [[x1]] Ex. (x1, y1) -> [[x1, y1]] Ex. (x1, x2, x3, y1, y2, y3) -> [[x1, y1], [x2, y2], [x3, y3]] Ex. (x1, x2, x3, y1, y2) -> Error Allows us to call function with arguments in a loop

Expand source code
def to_list(tup: tuple):
    '''Convert from tuple to packed list
    Ex. ([x1, x2, x3], [y1, y2, y3]) -> [[x1, y1], [x2, y2], [x3, y3]]
    Ex. ([x1], [y1]) -> [[x1, y1]]
    Ex. ([x1, x2, x3]) -> [[x1], [x2], [x3]]
    Ex. (x1) -> [[x1]]
    Ex. (x1, y1) -> [[x1, y1]]
    Ex. (x1, x2, x3, y1, y2, y3) -> [[x1, y1], [x2, y2], [x3, y3]]
    Ex. (x1, x2, x3, y1, y2) -> Error
    Allows us to call function with arguments in a loop
    '''
    n_tup = len(tup)
    if n_tup == 0:
        return []
    elif not isinstance(tup[0], list):
        # the first element is data
        if n_tup == 1:
            return list(tup)
        if n_tup % 2 != 0:
            raise ValueError('Don\'t know how to handle uneven number of args '
                             'without a list. Please wrap your args in a list.')
        # assume first half of args is input and second half is outcome
        return [list(el) for el in zip(tup[:(n_tup // 2)], tup[(n_tup // 2):])]
    elif n_tup == 1:
        return [[x] for x in tup[0]]
    n_mods = len(tup[0])
    lists_packed = [[] for _ in range(n_mods)]
    for i in range(n_mods):
        for j in range(n_tup):
            lists_packed[i].append(tup[j][i])
    return lists_packed
def to_tuple(lists: list)

Convert from lists to unpacked tuple Ex. [[x1, y1], [x2, y2], [x3, y3]] -> ([x1, x2, x3], [y1, y2, y3]) Ex. [[x1, y1]] -> ([x1], [y1]) Ex. [m1, m2, m3] -> [m1, m2, m3] Allows us to write X, y = ([x1, x2, x3], [y1, y2, y3])

Expand source code
def to_tuple(lists: list):
    '''Convert from lists to unpacked  tuple
    Ex. [[x1, y1], [x2, y2], [x3, y3]] -> ([x1, x2, x3], [y1, y2, y3])
    Ex. [[x1, y1]] -> ([x1], [y1])
    Ex. [m1, m2, m3] -> [m1, m2, m3]
    Allows us to write X, y = ([x1, x2, x3], [y1, y2, y3])
    '''
    n_mods = len(lists)
    if n_mods <= 1:
        return lists
    if not type(lists[0]) == list:
        return lists
    n_tup = len(lists[0])
    tup = [[] for _ in range(n_tup)]
    for i in range(n_mods):
        for j in range(n_tup):
            tup[j].append(lists[i][j])
    return tuple(tup)