from functools import total_ordering
from uuid import uuid4
from sqlalchemy import Float, and_, between, case, cast, func, or_
from recipe.compat import str
from recipe.exceptions import BadIngredient
from recipe.utils import AttrDict
[docs]@total_ordering
class Ingredient(object):
""" Ingredients combine to make a SQLAlchemy query.
Any unknown keyword arguments provided to an Ingredient
during initializatino are stored in a meta object.
.. code:: python
# icon is an unknown keyword argument
m = Metric(func.sum(MyTable.sales), icon='cog')
print(m.meta.icon)
>>> 'cog'
This meta storage can be used to add new capabilities to
ingredients.
Args:
id (:obj:`str`):
An id to identify this Ingredient. If ingredients are
added to a Shelf, the id is automatically set as the key in
the shelf.
columns (:obj:`list` of :obj:`ColumnElement`):
A list of SQLAlchemy columns to use in a query select.
filters (:obj:`list` of :obj:`BinaryExpression`):
A list of SQLAlchemy BinaryExpressions to use in the
.filter() clause of a query.
havings (:obj:`list` of :obj:`BinaryExpression`):
A list of SQLAlchemy BinaryExpressions to use in the
.having() clause of a query.
columns (:obj:`list` of :obj:`ColumnElement`):
A list of SQLAlchemy columns to use in the `group_by` clause
of a query.
formatters: (:obj:`list` of :obj:`callable`):
A list of callables to apply to the result values.
If formatters exist, property `{ingredient.id}_raw` will
exist on each result row containing the unformatted
value.
cache_context (:obj:`str`):
Extra context when caching this ingredient. DEPRECATED
ordering (`string`, 'asc' or 'desc'):
One of 'asc' or 'desc'. 'asc' is the default value.
The default ordering of this ingredient if it is
used in a ``recipe.order_by``.
This is added to the ingredient when the ingredient is
used in a ``recipe.order_by``.
quickfilters (:obj:`list` of named filters):
A list of named filters that can be accessed through
``build_filter``. Named filters are dictionaries with
a ``name`` (:obj:str) property and a ``condition`` property
(:obj:`BinaryExpression`)
Returns:
An Ingredient object.
"""
def __init__(self, **kwargs):
self.id = kwargs.pop('id', uuid4().hex[:12])
self.columns = kwargs.pop('columns', [])
self.filters = kwargs.pop('filters', [])
self.havings = kwargs.pop('havings', [])
self.group_by = kwargs.pop('group_by', [])
self.formatters = kwargs.pop('formatters', [])
self.quickfilters = kwargs.pop('quickfilters', [])
self.column_suffixes = kwargs.pop('column_suffixes', None)
self.cache_context = kwargs.pop('cache_context', '')
self.anonymize = False
# What order should this be in
self.ordering = kwargs.pop('ordering', 'asc')
if not isinstance(self.formatters, (list, tuple)):
raise BadIngredient(
'formatters passed to an ingredient must be a '
'list or tuple'
)
# If explicit suffixes are passed in, there must be one for each column
if self.column_suffixes is not None and \
len(self.column_suffixes) != len(self.columns):
raise BadIngredient(
'column_suffixes must be the same length as '
'columns'
)
# Any remaining passed properties are available in self.meta
self.meta = AttrDict(kwargs)
def __hash__(self):
return hash(self.describe())
def __repr__(self):
return self.describe()
def _stringify(self):
""" Return a relevant string based on ingredient type for repr and
ordering. Ingredients with the same classname, id and _stringify
value are considered the same. """
return ' '.join(str(col) for col in self.columns)
[docs] def describe(self):
"""A string representation of the ingredient."""
return u'({}){} {}'.format(
self.__class__.__name__, self.id, self._stringify()
)
def _format_value(self, value):
"""Formats value using any stored formatters. """
for f in self.formatters:
value = f(value)
return value
[docs] def make_column_suffixes(self):
""" Make sure we have the right column suffixes. These will be appended
to `id` when generating the query.
"""
if self.column_suffixes:
return self.column_suffixes
if len(self.columns) == 0:
return ()
elif len(self.columns) == 1:
if self.formatters:
return '_raw',
else:
return '',
else:
raise BadIngredient(
'column_suffixes must be supplied if there is '
'more than one column'
)
@property
def query_columns(self):
"""Yield labeled columns to be used as a select in a query.
"""
for column, suffix in zip(self.columns, self.make_column_suffixes()):
yield column.label(self.id + suffix)
@property
def order_by_columns(self):
""" Yield columns to be used in an order by using this ingredient
"""
for c in self.columns:
if self.ordering == 'desc':
yield c.desc()
else:
yield c
@property
def cauldron_extras(self):
""" Yield extra tuples containing a field name and a callable that takes
a row.
"""
if self.formatters:
raw_property = self.id + '_raw'
yield self.id, lambda row: \
self._format_value(getattr(row, raw_property))
def _order(self):
"""Ingredients are sorted by subclass then by id.
"""
if isinstance(self, Dimension):
return (0, self.id)
elif isinstance(self, Metric):
return (1, self.id)
elif isinstance(self, Filter):
return (2, self.id)
elif isinstance(self, Having):
return (3, self.id)
else:
return (4, self.id)
def __lt__(self, other):
""" Make ingredients sortable.
"""
return self._order() < other._order()
def __eq__(self, other):
""" Make ingredients sortable.
"""
return self._order() == other._order()
def __ne__(self, other):
""" Make ingredients sortable.
"""
return not (self._order() == other._order())
def _build_scalar_filter(self, value, operator=None):
"""Build a Filter given a single value.
Args:
value (a string, number, boolean or None):
operator (`str`)
A valid scalar operator. The default operator
is `eq`
Returns:
A Filter object
"""
filter_column = self.columns[0]
if operator is None or operator == 'eq':
# Default operator is 'eq' so if no operator is provided, handle
# like an 'eq'
if value is None:
return Filter(filter_column.is_(value))
else:
return Filter(filter_column == value)
if operator == 'ne':
return Filter(filter_column != value)
elif operator == 'lt':
return Filter(filter_column < value)
elif operator == 'lte':
return Filter(filter_column <= value)
elif operator == 'gt':
return Filter(filter_column > value)
elif operator == 'gte':
return Filter(filter_column >= value)
elif operator == 'is':
return Filter(filter_column.is_(value))
elif operator == 'isnot':
return Filter(filter_column.isnot(value))
elif operator == 'like':
return Filter(filter_column.like(value))
elif operator == 'ilike':
return Filter(filter_column.ilike(value))
elif operator == 'quickfilter':
for qf in self.quickfilters:
if qf.get('name') == value:
return Filter(qf.get('condition'))
raise ValueError(
'quickfilter {} was not found in '
'ingredient {}'.format(value, self.id)
)
else:
raise ValueError('Unknown operator {}'.format(operator))
def _build_vector_filter(self, value, operator=None):
"""Build a Filter given a list of values.
Args:
value (a string, number, boolean or None):
operator (:obj:`str`)
A valid vector operator. The default operator is
`in`.
Returns:
A Filter object
"""
filter_column = self.columns[0]
if operator is None or operator == 'in':
# Default operator is 'in' so if no operator is provided, handle
# like an 'in'
if None in value:
# filter out the Nones
non_none_value = sorted([v for v in value if v is not None])
if non_none_value:
return Filter(
or_(
filter_column.is_(None),
filter_column.in_(non_none_value)
)
)
else:
return Filter(filter_column.is_(None))
else:
# Sort to generate deterministic query sql for caching
value = sorted(value)
return Filter(filter_column.in_(value))
elif operator == 'notin':
if None in value:
# filter out the Nones
non_none_value = sorted([v for v in value if v is not None])
if non_none_value:
return Filter(
and_(
filter_column.isnot(None),
filter_column.notin_(non_none_value)
)
)
else:
return Filter(filter_column.isnot(None))
else:
# Sort to generate deterministic query sql for caching
value = sorted(value)
return Filter(filter_column.notin_(value))
elif operator == 'between':
if len(value) != 2:
ValueError(
'When using between, you can only supply a '
'lower and upper bounds.'
)
lower_bound, upper_bound = value
return Filter(between(filter_column, lower_bound, upper_bound))
else:
raise ValueError('Unknown operator {}'.format(operator))
[docs] def build_filter(self, value, operator=None):
"""
Builds a filter based on a supplied value and optional operator. If
no operator is supplied an ``in`` filter will be used for a list and a
``eq`` filter if we get a scalar value.
``build_filter`` is used by the AutomaticFilter extension.
Args:
value:
A value or list of values to operate against
operator (:obj:`str`)
An operator that determines the type of comparison
to do against value.
The default operator is 'in' if value is a list and
'eq' if value is a string, number, boolean or None.
Returns:
A Filter object
"""
value_is_scalar = not isinstance(value, (list, tuple))
if value_is_scalar:
return self._build_scalar_filter(value, operator=operator)
else:
return self._build_vector_filter(value, operator=operator)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.columns:
return self.columns[0]
else:
return None
[docs]class Filter(Ingredient):
""" A simple filter created from a single expression.
"""
def __init__(self, expression, **kwargs):
super(Filter, self).__init__(**kwargs)
self.filters = [expression]
def _stringify(self):
return ' '.join(str(expr) for expr in self.filters)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.filters:
return self.filters[0]
else:
return None
[docs]class Having(Ingredient):
""" A Having that limits results based on an aggregate boolean clause
"""
def __init__(self, expression, **kwargs):
super(Having, self).__init__(**kwargs)
self.havings = [expression]
def _stringify(self):
return ' '.join(str(expr) for expr in self.havings)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.havings:
return self.havings[0]
else:
return None
[docs]class Dimension(Ingredient):
"""A Dimension is an Ingredient that adds columns and groups by those
columns. Columns should be non-aggregate SQLAlchemy expressions.
The required expression supplies the dimension's value role. Additional
expressions can be provided in keyword arguments with keys
that look like "{role}_expression". The role is suffixed to the
end of the SQL column name.
For instance, the following
.. code:: python
Dimension(Hospitals.name,
latitude_expression=Hospitals.lat
longitude_expression=Hospitals.lng,
id='hospital')
would add columns named "hospital", "hospital_latitude", and
"hospital_longitude" to the recipes results. All three of these expressions
would be used as group bys.
The following additional keyword parameters are also supported:
Args:
lookup (:obj:`dict`):
A dictionary that is used to map values to new values.
Note: Lookup adds a ``formatter`` callable as the first
item in the list of formatters.
lookup_default (:obj:`object`)
A default to show if the value can't be found in the
lookup dictionary.
Returns:
A Filter object
:param lookup: dict A dictionary to translate values into
:param lookup_default: A default to show if the value can't be found in the
lookup dictionary.
"""
def __init__(self, expression, **kwargs):
super(Dimension, self).__init__(**kwargs)
# An optional exprssion to use instead of the value expression
# when ordering
order_by_expression = kwargs.pop('order_by_expression', None)
# We must always have a value role
self.roles = {'value': expression}
for k, v in kwargs.items():
role = None
if k.endswith('_expression'):
# Remove _expression to get the role
role = k[:-11]
if role:
if role == 'raw':
raise BadIngredient('raw is a reserved role in dimensions')
self.roles[role] = v
self.columns = []
self.group_by = []
self._order_by_columns = []
self.role_keys = []
if 'id' in self.roles:
self.columns.append(self.roles['id'])
self.group_by.append(self.roles['id'])
self.role_keys.append('id')
self._order_by_columns.append(self.roles['id'])
if 'value' in self.roles:
self.columns.append(self.roles['value'])
self.group_by.append(self.roles['value'])
self.role_keys.append('value')
# Order by columns are in order of value, id
# Extra roles are ignored
if order_by_expression is not None:
self._order_by_columns.insert(0, order_by_expression)
else:
self._order_by_columns.insert(0, self.roles['value'])
# Add all the other columns in sorted order of role
for k in sorted(self.roles.keys()):
if k in ('id', 'value'):
continue
self.columns.append(self.roles[k])
self.group_by.append(self.roles[k])
self.role_keys.append(k)
if 'lookup' in kwargs:
self.lookup = kwargs.get('lookup')
if not isinstance(self.lookup, dict):
raise BadIngredient('lookup must be a dictionary')
# Inject a formatter that performs the lookup
if 'lookup_default' in kwargs:
self.lookup_default = kwargs.get('lookup_default')
self.formatters.insert(
0, lambda value: self.lookup.
get(value, self.lookup_default)
)
else:
self.formatters.insert(
0, lambda value: self.lookup.get(value, value)
)
@property
def cauldron_extras(self):
""" Yield extra tuples containing a field name and a callable that takes
a row
"""
# This will format the value field
for extra in super(Dimension, self).cauldron_extras:
yield extra
yield self.id + '_id', lambda row: getattr(row, self.id_prop)
@property
def order_by_columns(self):
""" Yield columns to be used in an order by using this ingredient
"""
for c in self._order_by_columns:
if self.ordering == 'desc':
yield c.desc()
else:
yield c
[docs] def make_column_suffixes(self):
""" Make sure we have the right column suffixes. These will be appended
to `id` when generating the query.
"""
if self.formatters:
value_suffix = '_raw'
else:
value_suffix = ''
return tuple(
value_suffix if role == 'value' else '_' + role
for role in self.role_keys
)
@property
def id_prop(self):
""" The label of this dimensions id in the query columns """
if 'id' in self.role_keys:
return self.id + '_id'
else:
# Use the value dimension
if self.formatters:
return self.id + '_raw'
else:
return self.id
[docs]class IdValueDimension(Dimension):
"""
DEPRECATED: A convenience class for creating a Dimension
with a separate ``id_expression``. The following are identical.
.. code:: python
d = Dimension(Student.student_name, id_expression=Student.student_id)
d = IdValueDimension(Student.student_id, Student.student_name)
The former approach is recommended.
Args:
id_expression (:obj:`ColumnElement`)
A column expression that is used to identify the id
for a Dimension
value_expression (:obj:`ColumnElement`)
A column expression that is used to identify the value
for a Dimension
"""
def __init__(self, id_expression, value_expression, **kwargs):
kwargs['id_expression'] = id_expression
super(IdValueDimension, self).__init__(value_expression, **kwargs)
[docs]class LookupDimension(Dimension):
"""DEPRECATED Returns the expression value looked up in a lookup dictionary
"""
def __init__(self, expression, lookup, **kwargs):
"""A Dimension that replaces values using a lookup table.
:param expression: The dimension field
:type value: object
:param lookup: A dictionary of key/value pairs. If the keys will
be replaced by values in the value of this Dimension
:type operator: dict
:param default: The value to use if a dimension value isn't
found in the lookup table. The default behavior is to
show the original value if the value isn't found in the
lookup table.
:type default: object
"""
if 'default' in kwargs:
kwargs['lookup_default'] = kwargs.pop('default')
kwargs['lookup'] = lookup
super(LookupDimension, self).__init__(expression, **kwargs)
[docs]class Metric(Ingredient):
""" A simple metric created from a single expression
"""
def __init__(self, expression, **kwargs):
super(Metric, self).__init__(**kwargs)
self.columns = [expression]
[docs] def build_filter(self, value, operator=None):
"""Building filters with Metric returns Having objects. """
f = super().build_filter(value, operator=operator)
return Having(f.filters[0])
[docs]class DivideMetric(Metric):
""" A metric that divides a numerator by a denominator handling several
possible error conditions
The default strategy is to add an small value to the denominator
Passing ifzero allows you to give a different value if the denominator is
zero.
"""
def __init__(self, numerator, denominator, **kwargs):
ifzero = kwargs.pop('ifzero', 'epsilon')
epsilon = kwargs.pop('epsilon', 0.000000001)
if ifzero == 'epsilon':
# Add an epsilon value to denominator to avoid divide by zero
# errors
expression = cast(numerator, Float) / (
func.coalesce(cast(denominator, Float), 0.0) + epsilon
)
else:
# If the denominator is zero, return the ifzero value otherwise do
# the division
expression = case(((cast(denominator, Float) == 0.0, ifzero),),
else_=cast(numerator, Float) /
cast(denominator, Float))
super(DivideMetric, self).__init__(expression, **kwargs)
[docs]class WtdAvgMetric(DivideMetric):
""" A metric that generates the weighted average of a metric by a weight.
"""
def __init__(self, expression, weight_expression, **kwargs):
numerator = func.sum(expression * weight_expression)
denominator = func.sum(weight_expression)
super(WtdAvgMetric, self).__init__(numerator, denominator, **kwargs)