from functools import total_ordering
from uuid import uuid4
from sqlalchemy import Float, and_, between, case, cast, func, or_
from recipe.compat import str
from recipe.exceptions import BadIngredient
from recipe.utils import AttrDict
[docs]@total_ordering
class Ingredient(object):
""" Ingredients combine to make a SQLAlchemy query.
Any unknown keyword arguments provided to an Ingredient
during initializatino are stored in a meta object.
.. code:: python
# icon is an unknown keyword argument
m = Metric(func.sum(MyTable.sales), icon='cog')
print(m.meta.icon)
>>> 'cog'
This meta storage can be used to add new capabilities to
ingredients.
Args:
id (:obj:`str`):
An id to identify this Ingredient. If ingredients are
added to a Shelf, the id is automatically set as the key in
the shelf.
columns (:obj:`list` of :obj:`ColumnElement`):
A list of SQLAlchemy columns to use in a query select.
filters (:obj:`list` of :obj:`BinaryExpression`):
A list of SQLAlchemy BinaryExpressions to use in the
.filter() clause of a query.
havings (:obj:`list` of :obj:`BinaryExpression`):
A list of SQLAlchemy BinaryExpressions to use in the
.having() clause of a query.
columns (:obj:`list` of :obj:`ColumnElement`):
A list of SQLAlchemy columns to use in the `group_by` clause
of a query.
formatters: (:obj:`list` of :obj:`callable`):
A list of callables to apply to the result values.
If formatters exist, property `{ingredient.id}_raw` will
exist on each result row containing the unformatted
value.
cache_context (:obj:`str`):
Extra context when caching this ingredient. DEPRECATED
ordering (`string`, 'asc' or 'desc'):
One of 'asc' or 'desc'. 'asc' is the default value.
The default ordering of this ingredient if it is
used in a ``recipe.order_by``.
This is added to the ingredient when the ingredient is
used in a ``recipe.order_by``.
quickselects (:obj:`list` of named filters):
A list of named filters that can be accessed through
``build_filter``. Named filters are dictionaries with
a ``name`` (:obj:str) property and a ``condition`` property
(:obj:`BinaryExpression`)
Returns:
An Ingredient object.
"""
def __init__(self, **kwargs):
self.id = kwargs.pop("id", uuid4().hex[:12])
self.columns = kwargs.pop("columns", [])
self.filters = kwargs.pop("filters", [])
self.havings = kwargs.pop("havings", [])
self.group_by = kwargs.pop("group_by", [])
self.formatters = kwargs.pop("formatters", [])
self.quickselects = kwargs.pop("quickselects", [])
self.column_suffixes = kwargs.pop("column_suffixes", None)
self.cache_context = kwargs.pop("cache_context", "")
self.anonymize = False
# What order should this be in
self.ordering = kwargs.pop("ordering", "asc")
if not isinstance(self.formatters, (list, tuple)):
raise BadIngredient(
"formatters passed to an ingredient must be a " "list or tuple"
)
# If explicit suffixes are passed in, there must be one for each column
if self.column_suffixes is not None and len(self.column_suffixes) != len(
self.columns
):
raise BadIngredient("column_suffixes must be the same length as " "columns")
# Any remaining passed properties are available in self.meta
self.meta = AttrDict(kwargs)
def __hash__(self):
return hash(self.describe())
def __repr__(self):
return self.describe()
def _stringify(self):
""" Return a relevant string based on ingredient type for repr and
ordering. Ingredients with the same classname, id and _stringify
value are considered the same. """
return " ".join(str(col) for col in self.columns)
[docs] def describe(self):
"""A string representation of the ingredient."""
return u"({}){} {}".format(self.__class__.__name__, self.id, self._stringify())
def _format_value(self, value):
"""Formats value using any stored formatters. """
for f in self.formatters:
value = f(value)
return value
[docs] def make_column_suffixes(self):
""" Make sure we have the right column suffixes. These will be appended
to `id` when generating the query.
"""
if self.column_suffixes:
return self.column_suffixes
if len(self.columns) == 0:
return ()
elif len(self.columns) == 1:
if self.formatters:
return ("_raw",)
else:
return ("",)
else:
raise BadIngredient(
"column_suffixes must be supplied if there is " "more than one column"
)
@property
def query_columns(self):
"""Yield labeled columns to be used as a select in a query.
"""
for column, suffix in zip(self.columns, self.make_column_suffixes()):
yield column.label(self.id + suffix)
@property
def order_by_columns(self):
""" Yield columns to be used in an order by using this ingredient
"""
for c in self.columns:
if self.ordering == "desc":
yield c.desc()
else:
yield c
@property
def cauldron_extras(self):
""" Yield extra tuples containing a field name and a callable that takes
a row.
"""
if self.formatters:
raw_property = self.id + "_raw"
yield self.id, lambda row: self._format_value(getattr(row, raw_property))
def _order(self):
"""Ingredients are sorted by subclass then by id.
"""
if isinstance(self, Dimension):
return (0, self.id)
elif isinstance(self, Metric):
return (1, self.id)
elif isinstance(self, Filter):
return (2, self.id)
elif isinstance(self, Having):
return (3, self.id)
else:
return (4, self.id)
def __lt__(self, other):
""" Make ingredients sortable.
"""
return self._order() < other._order()
def __eq__(self, other):
""" Make ingredients sortable.
"""
return self._order() == other._order()
def __ne__(self, other):
""" Make ingredients sortable.
"""
return not (self._order() == other._order())
def _build_scalar_filter(self, value, operator=None):
"""Build a Filter given a single value.
Args:
value (a string, number, boolean or None):
operator (`str`)
A valid scalar operator. The default operator
is `eq`
Returns:
A Filter object
"""
filter_column = self.columns[0]
if operator is None or operator == "eq":
# Default operator is 'eq' so if no operator is provided, handle
# like an 'eq'
if value is None:
return Filter(filter_column.is_(value))
else:
return Filter(filter_column == value)
if operator == "ne":
return Filter(filter_column != value)
elif operator == "lt":
return Filter(filter_column < value)
elif operator == "lte":
return Filter(filter_column <= value)
elif operator == "gt":
return Filter(filter_column > value)
elif operator == "gte":
return Filter(filter_column >= value)
elif operator == "is":
return Filter(filter_column.is_(value))
elif operator == "isnot":
return Filter(filter_column.isnot(value))
elif operator == "like":
return Filter(filter_column.like(value))
elif operator == "ilike":
return Filter(filter_column.ilike(value))
elif operator == "quickselect":
for qs in self.quickselects:
if qs.get("name") == value:
return Filter(qs.get("condition"))
raise ValueError(
"quickselect {} was not found in "
"ingredient {}".format(value, self.id)
)
else:
raise ValueError("Unknown operator {}".format(operator))
def _build_vector_filter(self, value, operator=None):
"""Build a Filter given a list of values.
Args:
value (a string, number, boolean or None):
operator (:obj:`str`)
A valid vector operator. The default operator is
`in`.
Returns:
A Filter object
"""
filter_column = self.columns[0]
if operator is None or operator == "in":
# Default operator is 'in' so if no operator is provided, handle
# like an 'in'
if None in value:
# filter out the Nones
non_none_value = sorted([v for v in value if v is not None])
if non_none_value:
return Filter(
or_(filter_column.is_(None), filter_column.in_(non_none_value))
)
else:
return Filter(filter_column.is_(None))
else:
# Sort to generate deterministic query sql for caching
value = sorted(value)
return Filter(filter_column.in_(value))
elif operator == "notin":
if None in value:
# filter out the Nones
non_none_value = sorted([v for v in value if v is not None])
if non_none_value:
return Filter(
and_(
filter_column.isnot(None),
filter_column.notin_(non_none_value),
)
)
else:
return Filter(filter_column.isnot(None))
else:
# Sort to generate deterministic query sql for caching
value = sorted(value)
return Filter(filter_column.notin_(value))
elif operator == "between":
if len(value) != 2:
ValueError(
"When using between, you can only supply a "
"lower and upper bounds."
)
lower_bound, upper_bound = value
return Filter(between(filter_column, lower_bound, upper_bound))
elif operator == "quickselect":
qs_conditions = []
for v in value:
qs_found = False
for qs in self.quickselects:
if qs.get("name") == v:
qs_found = True
qs_conditions.append(qs.get("condition"))
break
if not qs_found:
raise ValueError(
"quickselect {} was not found in "
"ingredient {}".format(value, self.id)
)
return Filter(or_(*qs_conditions))
else:
raise ValueError("Unknown operator {}".format(operator))
[docs] def build_filter(self, value, operator=None):
"""
Builds a filter based on a supplied value and optional operator. If
no operator is supplied an ``in`` filter will be used for a list and a
``eq`` filter if we get a scalar value.
``build_filter`` is used by the AutomaticFilter extension.
Args:
value:
A value or list of values to operate against
operator (:obj:`str`)
An operator that determines the type of comparison
to do against value.
The default operator is 'in' if value is a list and
'eq' if value is a string, number, boolean or None.
Returns:
A Filter object
"""
value_is_scalar = not isinstance(value, (list, tuple))
if value_is_scalar:
return self._build_scalar_filter(value, operator=operator)
else:
return self._build_vector_filter(value, operator=operator)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.columns:
return self.columns[0]
else:
return None
[docs]class Filter(Ingredient):
""" A simple filter created from a single expression.
"""
def __init__(self, expression, **kwargs):
super(Filter, self).__init__(**kwargs)
self.filters = [expression]
def _stringify(self):
return " ".join(str(expr) for expr in self.filters)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.filters:
return self.filters[0]
else:
return None
[docs]class Having(Ingredient):
""" A Having that limits results based on an aggregate boolean clause
"""
def __init__(self, expression, **kwargs):
super(Having, self).__init__(**kwargs)
self.havings = [expression]
def _stringify(self):
return " ".join(str(expr) for expr in self.havings)
@property
def expression(self):
""" An accessor for the SQLAlchemy expression representing this
Ingredient. """
if self.havings:
return self.havings[0]
else:
return None
[docs]class Dimension(Ingredient):
"""A Dimension is an Ingredient that adds columns and groups by those
columns. Columns should be non-aggregate SQLAlchemy expressions.
The required expression supplies the dimension's value role. Additional
expressions can be provided in keyword arguments with keys
that look like "{role}_expression". The role is suffixed to the
end of the SQL column name.
For instance, the following
.. code:: python
Dimension(Hospitals.name,
latitude_expression=Hospitals.lat
longitude_expression=Hospitals.lng,
id='hospital')
would add columns named "hospital", "hospital_latitude", and
"hospital_longitude" to the recipes results. All three of these expressions
would be used as group bys.
The following additional keyword parameters are also supported:
Args:
lookup (:obj:`dict`):
A dictionary that is used to map values to new values.
Note: Lookup adds a ``formatter`` callable as the first
item in the list of formatters.
lookup_default (:obj:`object`)
A default to show if the value can't be found in the
lookup dictionary.
Returns:
A Filter object
:param lookup: dict A dictionary to translate values into
:param lookup_default: A default to show if the value can't be found in the
lookup dictionary.
"""
def __init__(self, expression, **kwargs):
super(Dimension, self).__init__(**kwargs)
# An optional exprssion to use instead of the value expression
# when ordering
order_by_expression = kwargs.pop("order_by_expression", None)
# We must always have a value role
self.roles = {"value": expression}
for k, v in kwargs.items():
role = None
if k.endswith("_expression"):
# Remove _expression to get the role
role = k[:-11]
if role:
if role == "raw":
raise BadIngredient("raw is a reserved role in dimensions")
self.roles[role] = v
self.columns = []
self.group_by = []
self._order_by_columns = []
self.role_keys = []
if "id" in self.roles:
self.columns.append(self.roles["id"])
self.group_by.append(self.roles["id"])
self.role_keys.append("id")
self._order_by_columns.append(self.roles["id"])
if "value" in self.roles:
self.columns.append(self.roles["value"])
self.group_by.append(self.roles["value"])
self.role_keys.append("value")
# Order by columns are in order of value, id
# Extra roles are ignored
if order_by_expression is not None:
self._order_by_columns.insert(0, order_by_expression)
else:
self._order_by_columns.insert(0, self.roles["value"])
# Add all the other columns in sorted order of role
for k in sorted(self.roles.keys()):
if k in ("id", "value"):
continue
self.columns.append(self.roles[k])
self.group_by.append(self.roles[k])
self.role_keys.append(k)
if "lookup" in kwargs:
self.lookup = kwargs.get("lookup")
if not isinstance(self.lookup, dict):
raise BadIngredient("lookup must be a dictionary")
# Inject a formatter that performs the lookup
if "lookup_default" in kwargs:
self.lookup_default = kwargs.get("lookup_default")
self.formatters.insert(
0, lambda value: self.lookup.get(value, self.lookup_default)
)
else:
self.formatters.insert(0, lambda value: self.lookup.get(value, value))
@property
def cauldron_extras(self):
""" Yield extra tuples containing a field name and a callable that takes
a row
"""
# This will format the value field
for extra in super(Dimension, self).cauldron_extras:
yield extra
yield self.id + "_id", lambda row: getattr(row, self.id_prop)
@property
def order_by_columns(self):
""" Yield columns to be used in an order by using this ingredient
"""
for c in self._order_by_columns:
if self.ordering == "desc":
yield c.desc()
else:
yield c
[docs] def make_column_suffixes(self):
""" Make sure we have the right column suffixes. These will be appended
to `id` when generating the query.
"""
if self.formatters:
value_suffix = "_raw"
else:
value_suffix = ""
return tuple(
value_suffix if role == "value" else "_" + role for role in self.role_keys
)
@property
def id_prop(self):
""" The label of this dimensions id in the query columns """
if "id" in self.role_keys:
return self.id + "_id"
else:
# Use the value dimension
if self.formatters:
return self.id + "_raw"
else:
return self.id
[docs]class IdValueDimension(Dimension):
"""
DEPRECATED: A convenience class for creating a Dimension
with a separate ``id_expression``. The following are identical.
.. code:: python
d = Dimension(Student.student_name, id_expression=Student.student_id)
d = IdValueDimension(Student.student_id, Student.student_name)
The former approach is recommended.
Args:
id_expression (:obj:`ColumnElement`)
A column expression that is used to identify the id
for a Dimension
value_expression (:obj:`ColumnElement`)
A column expression that is used to identify the value
for a Dimension
"""
def __init__(self, id_expression, value_expression, **kwargs):
kwargs["id_expression"] = id_expression
super(IdValueDimension, self).__init__(value_expression, **kwargs)
[docs]class LookupDimension(Dimension):
"""DEPRECATED Returns the expression value looked up in a lookup dictionary
"""
def __init__(self, expression, lookup, **kwargs):
"""A Dimension that replaces values using a lookup table.
:param expression: The dimension field
:type value: object
:param lookup: A dictionary of key/value pairs. If the keys will
be replaced by values in the value of this Dimension
:type operator: dict
:param default: The value to use if a dimension value isn't
found in the lookup table. The default behavior is to
show the original value if the value isn't found in the
lookup table.
:type default: object
"""
if "default" in kwargs:
kwargs["lookup_default"] = kwargs.pop("default")
kwargs["lookup"] = lookup
super(LookupDimension, self).__init__(expression, **kwargs)
[docs]class Metric(Ingredient):
""" A simple metric created from a single expression
"""
def __init__(self, expression, **kwargs):
super(Metric, self).__init__(**kwargs)
self.columns = [expression]
[docs] def build_filter(self, value, operator=None):
"""Building filters with Metric returns Having objects. """
f = super().build_filter(value, operator=operator)
return Having(f.filters[0])
[docs]class DivideMetric(Metric):
""" A metric that divides a numerator by a denominator handling several
possible error conditions
The default strategy is to add an small value to the denominator
Passing ifzero allows you to give a different value if the denominator is
zero.
"""
def __init__(self, numerator, denominator, **kwargs):
ifzero = kwargs.pop("ifzero", "epsilon")
epsilon = kwargs.pop("epsilon", 0.000000001)
if ifzero == "epsilon":
# Add an epsilon value to denominator to avoid divide by zero
# errors
expression = cast(numerator, Float) / (
func.coalesce(cast(denominator, Float), 0.0) + epsilon
)
else:
# If the denominator is zero, return the ifzero value otherwise do
# the division
expression = case(
((cast(denominator, Float) == 0.0, ifzero),),
else_=cast(numerator, Float) / cast(denominator, Float),
)
super(DivideMetric, self).__init__(expression, **kwargs)
[docs]class WtdAvgMetric(DivideMetric):
""" A metric that generates the weighted average of a metric by a weight.
"""
def __init__(self, expression, weight_expression, **kwargs):
numerator = func.sum(expression * weight_expression)
denominator = func.sum(weight_expression)
super(WtdAvgMetric, self).__init__(numerator, denominator, **kwargs)