Source code for recipe.ingredients

from functools import total_ordering
from uuid import uuid4

from sqlalchemy import Float, and_, between, case, cast, func, or_

from recipe.compat import str
from recipe.exceptions import BadIngredient
from recipe.utils import AttrDict


[docs]@total_ordering class Ingredient(object): """ Ingredients combine to make a SQLAlchemy query. Any unknown keyword arguments provided to an Ingredient during initializatino are stored in a meta object. .. code:: python # icon is an unknown keyword argument m = Metric(func.sum(MyTable.sales), icon='cog') print(m.meta.icon) >>> 'cog' This meta storage can be used to add new capabilities to ingredients. Args: id (:obj:`str`): An id to identify this Ingredient. If ingredients are added to a Shelf, the id is automatically set as the key in the shelf. columns (:obj:`list` of :obj:`ColumnElement`): A list of SQLAlchemy columns to use in a query select. filters (:obj:`list` of :obj:`BinaryExpression`): A list of SQLAlchemy BinaryExpressions to use in the .filter() clause of a query. havings (:obj:`list` of :obj:`BinaryExpression`): A list of SQLAlchemy BinaryExpressions to use in the .having() clause of a query. columns (:obj:`list` of :obj:`ColumnElement`): A list of SQLAlchemy columns to use in the `group_by` clause of a query. formatters: (:obj:`list` of :obj:`callable`): A list of callables to apply to the result values. If formatters exist, property `{ingredient.id}_raw` will exist on each result row containing the unformatted value. cache_context (:obj:`str`): Extra context when caching this ingredient. DEPRECATED ordering (`string`, 'asc' or 'desc'): One of 'asc' or 'desc'. 'asc' is the default value. The default ordering of this ingredient if it is used in a ``recipe.order_by``. This is added to the ingredient when the ingredient is used in a ``recipe.order_by``. quickfilters (:obj:`list` of named filters): A list of named filters that can be accessed through ``build_filter``. Named filters are dictionaries with a ``name`` (:obj:str) property and a ``condition`` property (:obj:`BinaryExpression`) Returns: An Ingredient object. """ def __init__(self, **kwargs): self.id = kwargs.pop('id', uuid4().hex[:12]) self.columns = kwargs.pop('columns', []) self.filters = kwargs.pop('filters', []) self.havings = kwargs.pop('havings', []) self.group_by = kwargs.pop('group_by', []) self.formatters = kwargs.pop('formatters', []) self.quickfilters = kwargs.pop('quickfilters', []) self.column_suffixes = kwargs.pop('column_suffixes', None) self.cache_context = kwargs.pop('cache_context', '') self.anonymize = False # What order should this be in self.ordering = kwargs.pop('ordering', 'asc') if not isinstance(self.formatters, (list, tuple)): raise BadIngredient( 'formatters passed to an ingredient must be a ' 'list or tuple' ) # If explicit suffixes are passed in, there must be one for each column if self.column_suffixes is not None and \ len(self.column_suffixes) != len(self.columns): raise BadIngredient( 'column_suffixes must be the same length as ' 'columns' ) # Any remaining passed properties are available in self.meta self.meta = AttrDict(kwargs) def __hash__(self): return hash(self.describe()) def __repr__(self): return self.describe() def _stringify(self): """ Return a relevant string based on ingredient type for repr and ordering. Ingredients with the same classname, id and _stringify value are considered the same. """ return ' '.join(str(col) for col in self.columns)
[docs] def describe(self): """A string representation of the ingredient.""" return u'({}){} {}'.format( self.__class__.__name__, self.id, self._stringify() )
def _format_value(self, value): """Formats value using any stored formatters. """ for f in self.formatters: value = f(value) return value
[docs] def make_column_suffixes(self): """ Make sure we have the right column suffixes. These will be appended to `id` when generating the query. """ if self.column_suffixes: return self.column_suffixes if len(self.columns) == 0: return () elif len(self.columns) == 1: if self.formatters: return '_raw', else: return '', else: raise BadIngredient( 'column_suffixes must be supplied if there is ' 'more than one column' )
@property def query_columns(self): """Yield labeled columns to be used as a select in a query. """ for column, suffix in zip(self.columns, self.make_column_suffixes()): yield column.label(self.id + suffix) @property def order_by_columns(self): """ Yield columns to be used in an order by using this ingredient """ for c in self.columns: if self.ordering == 'desc': yield c.desc() else: yield c @property def cauldron_extras(self): """ Yield extra tuples containing a field name and a callable that takes a row. """ if self.formatters: raw_property = self.id + '_raw' yield self.id, lambda row: \ self._format_value(getattr(row, raw_property)) def _order(self): """Ingredients are sorted by subclass then by id. """ if isinstance(self, Dimension): return (0, self.id) elif isinstance(self, Metric): return (1, self.id) elif isinstance(self, Filter): return (2, self.id) elif isinstance(self, Having): return (3, self.id) else: return (4, self.id) def __lt__(self, other): """ Make ingredients sortable. """ return self._order() < other._order() def __eq__(self, other): """ Make ingredients sortable. """ return self._order() == other._order() def __ne__(self, other): """ Make ingredients sortable. """ return not (self._order() == other._order()) def _build_scalar_filter(self, value, operator=None): """Build a Filter given a single value. Args: value (a string, number, boolean or None): operator (`str`) A valid scalar operator. The default operator is `eq` Returns: A Filter object """ filter_column = self.columns[0] if operator is None or operator == 'eq': # Default operator is 'eq' so if no operator is provided, handle # like an 'eq' if value is None: return Filter(filter_column.is_(value)) else: return Filter(filter_column == value) if operator == 'ne': return Filter(filter_column != value) elif operator == 'lt': return Filter(filter_column < value) elif operator == 'lte': return Filter(filter_column <= value) elif operator == 'gt': return Filter(filter_column > value) elif operator == 'gte': return Filter(filter_column >= value) elif operator == 'is': return Filter(filter_column.is_(value)) elif operator == 'isnot': return Filter(filter_column.isnot(value)) elif operator == 'like': return Filter(filter_column.like(value)) elif operator == 'ilike': return Filter(filter_column.ilike(value)) elif operator == 'quickfilter': for qf in self.quickfilters: if qf.get('name') == value: return Filter(qf.get('condition')) raise ValueError( 'quickfilter {} was not found in ' 'ingredient {}'.format(value, self.id) ) else: raise ValueError('Unknown operator {}'.format(operator)) def _build_vector_filter(self, value, operator=None): """Build a Filter given a list of values. Args: value (a string, number, boolean or None): operator (:obj:`str`) A valid vector operator. The default operator is `in`. Returns: A Filter object """ filter_column = self.columns[0] if operator is None or operator == 'in': # Default operator is 'in' so if no operator is provided, handle # like an 'in' if None in value: # filter out the Nones non_none_value = sorted([v for v in value if v is not None]) if non_none_value: return Filter( or_( filter_column.is_(None), filter_column.in_(non_none_value) ) ) else: return Filter(filter_column.is_(None)) else: # Sort to generate deterministic query sql for caching value = sorted(value) return Filter(filter_column.in_(value)) elif operator == 'notin': if None in value: # filter out the Nones non_none_value = sorted([v for v in value if v is not None]) if non_none_value: return Filter( and_( filter_column.isnot(None), filter_column.notin_(non_none_value) ) ) else: return Filter(filter_column.isnot(None)) else: # Sort to generate deterministic query sql for caching value = sorted(value) return Filter(filter_column.notin_(value)) elif operator == 'between': if len(value) != 2: ValueError( 'When using between, you can only supply a ' 'lower and upper bounds.' ) lower_bound, upper_bound = value return Filter(between(filter_column, lower_bound, upper_bound)) else: raise ValueError('Unknown operator {}'.format(operator))
[docs] def build_filter(self, value, operator=None): """ Builds a filter based on a supplied value and optional operator. If no operator is supplied an ``in`` filter will be used for a list and a ``eq`` filter if we get a scalar value. ``build_filter`` is used by the AutomaticFilter extension. Args: value: A value or list of values to operate against operator (:obj:`str`) An operator that determines the type of comparison to do against value. The default operator is 'in' if value is a list and 'eq' if value is a string, number, boolean or None. Returns: A Filter object """ value_is_scalar = not isinstance(value, (list, tuple)) if value_is_scalar: return self._build_scalar_filter(value, operator=operator) else: return self._build_vector_filter(value, operator=operator)
@property def expression(self): """ An accessor for the SQLAlchemy expression representing this Ingredient. """ if self.columns: return self.columns[0] else: return None
[docs]class Filter(Ingredient): """ A simple filter created from a single expression. """ def __init__(self, expression, **kwargs): super(Filter, self).__init__(**kwargs) self.filters = [expression] def _stringify(self): return ' '.join(str(expr) for expr in self.filters) @property def expression(self): """ An accessor for the SQLAlchemy expression representing this Ingredient. """ if self.filters: return self.filters[0] else: return None
[docs]class Having(Ingredient): """ A Having that limits results based on an aggregate boolean clause """ def __init__(self, expression, **kwargs): super(Having, self).__init__(**kwargs) self.havings = [expression] def _stringify(self): return ' '.join(str(expr) for expr in self.havings) @property def expression(self): """ An accessor for the SQLAlchemy expression representing this Ingredient. """ if self.havings: return self.havings[0] else: return None
[docs]class Dimension(Ingredient): """A Dimension is an Ingredient that adds columns and groups by those columns. Columns should be non-aggregate SQLAlchemy expressions. The required expression supplies the dimension's value role. Additional expressions can be provided in keyword arguments with keys that look like "{role}_expression". The role is suffixed to the end of the SQL column name. For instance, the following .. code:: python Dimension(Hospitals.name, latitude_expression=Hospitals.lat longitude_expression=Hospitals.lng, id='hospital') would add columns named "hospital", "hospital_latitude", and "hospital_longitude" to the recipes results. All three of these expressions would be used as group bys. The following additional keyword parameters are also supported: Args: lookup (:obj:`dict`): A dictionary that is used to map values to new values. Note: Lookup adds a ``formatter`` callable as the first item in the list of formatters. lookup_default (:obj:`object`) A default to show if the value can't be found in the lookup dictionary. Returns: A Filter object :param lookup: dict A dictionary to translate values into :param lookup_default: A default to show if the value can't be found in the lookup dictionary. """ def __init__(self, expression, **kwargs): super(Dimension, self).__init__(**kwargs) # An optional exprssion to use instead of the value expression # when ordering order_by_expression = kwargs.pop('order_by_expression', None) # We must always have a value role self.roles = {'value': expression} for k, v in kwargs.items(): role = None if k.endswith('_expression'): # Remove _expression to get the role role = k[:-11] if role: if role == 'raw': raise BadIngredient('raw is a reserved role in dimensions') self.roles[role] = v self.columns = [] self.group_by = [] self._order_by_columns = [] self.role_keys = [] if 'id' in self.roles: self.columns.append(self.roles['id']) self.group_by.append(self.roles['id']) self.role_keys.append('id') self._order_by_columns.append(self.roles['id']) if 'value' in self.roles: self.columns.append(self.roles['value']) self.group_by.append(self.roles['value']) self.role_keys.append('value') # Order by columns are in order of value, id # Extra roles are ignored if order_by_expression is not None: self._order_by_columns.insert(0, order_by_expression) else: self._order_by_columns.insert(0, self.roles['value']) # Add all the other columns in sorted order of role for k in sorted(self.roles.keys()): if k in ('id', 'value'): continue self.columns.append(self.roles[k]) self.group_by.append(self.roles[k]) self.role_keys.append(k) if 'lookup' in kwargs: self.lookup = kwargs.get('lookup') if not isinstance(self.lookup, dict): raise BadIngredient('lookup must be a dictionary') # Inject a formatter that performs the lookup if 'lookup_default' in kwargs: self.lookup_default = kwargs.get('lookup_default') self.formatters.insert( 0, lambda value: self.lookup. get(value, self.lookup_default) ) else: self.formatters.insert( 0, lambda value: self.lookup.get(value, value) ) @property def cauldron_extras(self): """ Yield extra tuples containing a field name and a callable that takes a row """ # This will format the value field for extra in super(Dimension, self).cauldron_extras: yield extra yield self.id + '_id', lambda row: getattr(row, self.id_prop) @property def order_by_columns(self): """ Yield columns to be used in an order by using this ingredient """ for c in self._order_by_columns: if self.ordering == 'desc': yield c.desc() else: yield c
[docs] def make_column_suffixes(self): """ Make sure we have the right column suffixes. These will be appended to `id` when generating the query. """ if self.formatters: value_suffix = '_raw' else: value_suffix = '' return tuple( value_suffix if role == 'value' else '_' + role for role in self.role_keys )
@property def id_prop(self): """ The label of this dimensions id in the query columns """ if 'id' in self.role_keys: return self.id + '_id' else: # Use the value dimension if self.formatters: return self.id + '_raw' else: return self.id
[docs]class IdValueDimension(Dimension): """ DEPRECATED: A convenience class for creating a Dimension with a separate ``id_expression``. The following are identical. .. code:: python d = Dimension(Student.student_name, id_expression=Student.student_id) d = IdValueDimension(Student.student_id, Student.student_name) The former approach is recommended. Args: id_expression (:obj:`ColumnElement`) A column expression that is used to identify the id for a Dimension value_expression (:obj:`ColumnElement`) A column expression that is used to identify the value for a Dimension """ def __init__(self, id_expression, value_expression, **kwargs): kwargs['id_expression'] = id_expression super(IdValueDimension, self).__init__(value_expression, **kwargs)
[docs]class LookupDimension(Dimension): """DEPRECATED Returns the expression value looked up in a lookup dictionary """ def __init__(self, expression, lookup, **kwargs): """A Dimension that replaces values using a lookup table. :param expression: The dimension field :type value: object :param lookup: A dictionary of key/value pairs. If the keys will be replaced by values in the value of this Dimension :type operator: dict :param default: The value to use if a dimension value isn't found in the lookup table. The default behavior is to show the original value if the value isn't found in the lookup table. :type default: object """ if 'default' in kwargs: kwargs['lookup_default'] = kwargs.pop('default') kwargs['lookup'] = lookup super(LookupDimension, self).__init__(expression, **kwargs)
[docs]class Metric(Ingredient): """ A simple metric created from a single expression """ def __init__(self, expression, **kwargs): super(Metric, self).__init__(**kwargs) self.columns = [expression]
[docs] def build_filter(self, value, operator=None): """Building filters with Metric returns Having objects. """ f = super().build_filter(value, operator=operator) return Having(f.filters[0])
[docs]class DivideMetric(Metric): """ A metric that divides a numerator by a denominator handling several possible error conditions The default strategy is to add an small value to the denominator Passing ifzero allows you to give a different value if the denominator is zero. """ def __init__(self, numerator, denominator, **kwargs): ifzero = kwargs.pop('ifzero', 'epsilon') epsilon = kwargs.pop('epsilon', 0.000000001) if ifzero == 'epsilon': # Add an epsilon value to denominator to avoid divide by zero # errors expression = cast(numerator, Float) / ( func.coalesce(cast(denominator, Float), 0.0) + epsilon ) else: # If the denominator is zero, return the ifzero value otherwise do # the division expression = case(((cast(denominator, Float) == 0.0, ifzero),), else_=cast(numerator, Float) / cast(denominator, Float)) super(DivideMetric, self).__init__(expression, **kwargs)
[docs]class WtdAvgMetric(DivideMetric): """ A metric that generates the weighted average of a metric by a weight. """ def __init__(self, expression, weight_expression, **kwargs): numerator = func.sum(expression * weight_expression) denominator = func.sum(weight_expression) super(WtdAvgMetric, self).__init__(numerator, denominator, **kwargs)