import dateparser
from copy import copy, deepcopy
from datetime import date, datetime
from six import iteritems
from sqlalchemy import (
Float, Integer, String, Table, and_, case, cast, distinct, func, or_
)
from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.sql.base import ImmutableColumnCollection
from sqlalchemy.util import lightweight_named_tuple
from sureberus import errors as E
from sureberus import normalize_schema
from yaml import safe_load
from recipe import ingredients
from recipe.compat import basestring
from recipe.exceptions import BadIngredient, BadRecipe
from recipe.ingredients import Dimension, Filter, Ingredient, Metric
from recipe.schemas import (
aggregations, condition_schema, ingredient_schema, shelf_schema,
sqlalchemy_datatypes
)
# Ensure case and distinct don't get reaped. We need it in scope for
# creating Metrics
_distinct = distinct
_case = case
_POP_DEFAULT = object()
# constant used for ensuring safe division
SAFE_DIVISON_EPSILON = 0.000000001
def ingredient_class_for_name(class_name):
"""Get the class in the recipe.ingredients module with the given name."""
return getattr(ingredients, class_name, None)
def _find_in_columncollection(columns, name):
""" Find a column in a column collection by name or _label"""
for col in columns:
if col.name == name or getattr(col, '_label', None) == name:
return col
return None
def find_column(selectable, name):
"""
Find a column named `name` in selectable
:param selectable:
:param name:
:return: A column object
"""
from recipe import Recipe
if isinstance(selectable, Recipe):
selectable = selectable.subquery()
# Selectable is a table
if isinstance(selectable, DeclarativeMeta):
col = getattr(selectable, name, None)
if col is not None:
return col
col = _find_in_columncollection(selectable.__table__.columns, name)
if col is not None:
return col
# Selectable is a sqlalchemy subquery
elif hasattr(selectable,
'c') and isinstance(selectable.c, ImmutableColumnCollection):
col = getattr(selectable.c, name, None)
if col is not None:
return col
col = _find_in_columncollection(selectable.c, name)
if col is not None:
return col
raise BadIngredient('Can not find {} in {}'.format(name, selectable))
def _convert_date_value(v):
parse_kwargs = {
'languages': ['en'],
}
if isinstance(v, date):
return v
elif isinstance(v, datetime):
return v.date()
elif isinstance(v, basestring):
parsed_dt = dateparser.parse(v, **parse_kwargs)
if parsed_dt is None:
raise ValueError('Could not parse date in {}'.format(v))
return parsed_dt.date()
else:
raise ValueError('Can not convert {} to date'.format(v))
def _convert_datetime_value(v):
parse_kwargs = {
'languages': ['en'],
}
if isinstance(v, datetime):
return v
elif isinstance(v, date):
return datetime(v.year, v.month, v.day)
elif isinstance(v, basestring):
parsed_dt = dateparser.parse(v, **parse_kwargs)
if parsed_dt is None:
raise ValueError('Could not parse datetime in {}'.format(v))
return parsed_dt
else:
raise ValueError('Can not convert {} to datetime'.format(v))
def convert_value(field, value):
"""Convert values into something appropriate for this SQLAlchemy data type
:param field: A SQLAlchemy expression
:param values: A value or list of values
"""
if isinstance(value, (list, tuple)):
if str(field.type) == 'DATE':
return [_convert_date_value(v) for v in value]
elif str(field.type) == 'DATETIME':
return [_convert_datetime_value(v) for v in value]
else:
return value
else:
if str(field.type) == 'DATE':
return _convert_date_value(value)
elif str(field.type) == 'DATETIME':
return _convert_datetime_value(value)
else:
return value
def parse_validated_condition(cond, selectable):
""" Convert a validated condition into a SQLAlchemy boolean expression """
if cond is None:
return
if 'and' in cond:
conditions = []
for c in cond.get('and', []):
conditions.append(parse_validated_condition(c, selectable))
return and_(*conditions)
elif 'or' in cond:
conditions = []
for c in cond.get('or', []):
conditions.append(parse_validated_condition(c, selectable))
return or_(*conditions)
elif 'field' in cond:
field = parse_validated_field(cond.get('field'), selectable)
_op = cond.get('_op')
_op_value = convert_value(field, cond.get('_op_value'))
if _op == 'between':
return getattr(field, _op)(*_op_value)
else:
return getattr(field, _op)(_op_value)
def parse_unvalidated_condition(cond, selectable):
if cond is None:
return
try:
cond = normalize_schema(condition_schema, cond, allow_unknown=False)
except E.SureError as e:
raise BadIngredient(str(e))
return parse_validated_condition(cond, selectable)
def parse_unvalidated_field(unvalidated_fld, selectable, aggregated=True):
kind = 'Metric' if aggregated else 'Dimension'
ingr = {'field': unvalidated_fld, 'kind': kind}
try:
ingr_dict = normalize_schema(
ingredient_schema, ingr, allow_unknown=True
)
except E.SureError as e:
raise BadIngredient(str(e))
return parse_validated_field(ingr_dict['field'], selectable)
def ingredient_from_unvalidated_dict(unvalidated_ingr, selectable):
try:
ingr_dict = normalize_schema(
ingredient_schema, unvalidated_ingr, allow_unknown=True
)
except E.SureError as e:
raise BadIngredient(str(e))
return ingredient_from_validated_dict(ingr_dict, selectable)
def parse_validated_field(fld, selectable, use_bucket_labels=True):
""" Converts a validated field to a sqlalchemy expression.
Field references are looked up in selectable """
if fld is None:
return
fld = deepcopy(fld)
if fld.pop('_use_raw_value', False):
return float(fld['value'])
if 'buckets' in fld:
# Buckets only appear in dimensions
buckets_default_label = fld.get(
'buckets_default_label'
) if use_bucket_labels else 9999
conditions = [(
parse_validated_condition(cond, selectable),
cond.get('label') if use_bucket_labels else idx
) for idx, cond in enumerate(fld.get('buckets', []))]
field = case(conditions, else_=buckets_default_label)
else:
field = find_column(selectable, fld['value'])
operator_lookup = {
'+': lambda fld: getattr(fld, '__add__'),
'-': lambda fld: getattr(fld, '__sub__'),
'/': lambda fld: getattr(fld, '__div__'),
'*': lambda fld: getattr(fld, '__mul__'),
}
for operator in fld.get('operators', []):
op = operator['operator']
other_field = parse_validated_field(operator['field'], selectable)
if op == '/':
other_field = func.coalesce(cast(other_field, Float), 0.0) \
+ SAFE_DIVISON_EPSILON
field = operator_lookup[op](field)(other_field)
# Apply a condition if it exists
cond = parse_validated_condition(fld.get('condition', None), selectable)
if cond is not None:
field = case([(cond, field)])
# Lookup the aggregation function
aggr_fn = aggregations.get(fld.get('aggregation'))
field = aggr_fn(field)
# lookup the sqlalchemy_datatypes
cast_to_datatype = sqlalchemy_datatypes.get(fld.get('_cast_to_datatype'))
if cast_to_datatype is not None:
field = cast(field, cast_to_datatype)
coalesce_to_value = fld.get('_coalesce_to_value')
if coalesce_to_value is not None:
field = func.coalesce(field, coalesce_to_value)
return field
def ingredient_from_validated_dict(ingr_dict, selectable):
""" Create an ingredient from an dictionary.
This object will be deserialized from yaml """
kind = ingr_dict.pop('kind', 'Metric')
IngredientClass = ingredient_class_for_name(kind)
if IngredientClass is None:
raise BadIngredient('Unknown ingredient kind')
field_defn = ingr_dict.pop('field', None)
divide_by_defn = ingr_dict.pop('divide_by', None)
field = parse_validated_field(
field_defn, selectable, use_bucket_labels=True
)
if isinstance(field_defn, dict) and 'buckets' in field_defn:
ingr_dict['order_by_expression'] = parse_validated_field(
field_defn, selectable, use_bucket_labels=False
)
if divide_by_defn is not None:
# Perform a divide by zero safe division
divide_by = parse_validated_field(divide_by_defn, selectable)
field = cast(field, Float) / (
func.coalesce(cast(divide_by, Float), 0.0) + SAFE_DIVISON_EPSILON
)
quickfilters = ingr_dict.pop('quickfilters', None)
parsed_quickfilters = []
if quickfilters:
for qf in quickfilters:
parsed_quickfilters.append({
'name':
qf['name'],
'condition':
parse_validated_condition(
qf.get('condition', None), selectable
),
})
ingr_dict['quickfilters'] = parsed_quickfilters
args = [field]
# Each extra field contains a name and a field
for extra in ingr_dict.pop('extra_fields', []):
ingr_dict[extra.get('name')] = \
parse_validated_field(extra.get('field'), selectable)
return IngredientClass(*args, **ingr_dict)
[docs]class Shelf(object):
"""Holds ingredients used by a recipe.
Can be initialized with no arguments, but also accepts:
- a dictionary of ingredients as a positional argument
- ingredients as keyword arguments
These keyword arguments have special meaning:
:param select_from: The SQLALchemy-compatible object which will be queried
(usually a Table or ORM object).
:param table: Unused, but stored on the `Meta` attribute.
:param metadata: Unused, but stored on the `Meta` attribute.
"""
class Meta:
anonymize = False
table = None
select_from = None
ingredient_order = []
metadata = None
def __init__(self, *args, **kwargs):
self.Meta = type(self).Meta()
self.Meta.ingredient_order = []
self.Meta.table = kwargs.pop('table', None)
self.Meta.select_from = kwargs.pop('select_from', None)
self.Meta.metadata = kwargs.pop('metadata', None)
self._ingredients = {}
self.update(*args, **kwargs)
# Dict Interface
def get(self, k, d=None):
ingredient = self._ingredients.get(k, d)
if isinstance(ingredient, Ingredient):
ingredient.id = k
ingredient.anonymize = self.Meta.anonymize
return ingredient
[docs] def items(self):
"""Return an iterator over the ingredient names and values."""
return self._ingredients.items()
[docs] def values(self):
"""Return an iterator over the ingredients."""
return self._ingredients.values()
[docs] def keys(self):
"""Return an iterator over the ingredient keys."""
return self._ingredients.keys()
def __copy__(self):
meta = copy(self.Meta)
ingredients = copy(self._ingredients)
new_shelf = type(self)(ingredients)
new_shelf.Meta = meta
return new_shelf
def __iter__(self):
return iter(self._ingredients)
def __getitem__(self, key):
""" Set the id and anonymize property of the ingredient whenever we
get or set items """
ingr = self._ingredients[key]
# Ensure the ingredient's `anonymize` matches the shelf.
# TODO: this is nasty, but *somewhat* safe because we are (hopefully)
# guaranteed to "own" copies of all of our ingredients. It would be
# much better if Shelf had logic that ran when anonymize is set to
# update all ingredients. Or better yet, the code that anonymizes
# queries should just look at the shelf instead of the ingredients.
# One way in this is "spooky" is:
# ingr = shelf['foo']
# # ingr.anonymize is now False
# shelf.Meta.anonymize = True
# # ingr.anonymize is still False
# shelf['foo] # ignore result
# # ingr.anonymize is now True
ingr.anonymize = self.Meta.anonymize
return ingr
def __setitem__(self, key, ingredient):
""" Set the id and anonymize property of the ingredient whenever we
get or set items """
# Maintainer's note: try to make all mutation of self._ingredients go
# through this method, so we can reliably copy & annotate the
# ingredients that go into the Shelf.
if not isinstance(ingredient, Ingredient):
raise TypeError(
'Can only set Ingredients as items on Shelf. '
'Got: {!r}'.format(ingredient)
)
ingredient_copy = copy(ingredient)
ingredient_copy.id = key
ingredient_copy.anonymize = self.Meta.anonymize
self._ingredients[key] = ingredient_copy
def __contains__(self, key):
return key in self._ingredients
def __len__(self):
return len(self._ingredients)
def clear(self):
self._ingredients.clear()
def update(self, d=None, **kwargs):
items = []
if d is not None:
items = list(d.items())
for k, v in items + list(kwargs.items()):
self[k] = v
[docs] def pop(self, k, d=_POP_DEFAULT):
"""Pop an ingredient off of this shelf."""
if d is _POP_DEFAULT:
return self._ingredients.pop(k)
else:
return self._ingredients.pop(k, d)
# End dict interface
[docs] def ingredients(self):
""" Return the ingredients in this shelf in a deterministic order """
return sorted(list(self.values()))
@property
def dimension_ids(self):
""" Return the Dimensions on this shelf in the order in which
they were used."""
return self._sorted_ingredients([
d.id for d in self.values() if isinstance(d, Dimension)
])
@property
def metric_ids(self):
""" Return the Metrics on this shelf in the order in which
they were used. """
return self._sorted_ingredients([
d.id for d in self.values() if isinstance(d, Metric)
])
@property
def filter_ids(self):
""" Return the Metrics on this shelf in the order in which
they were used. """
return self._sorted_ingredients([
d.id for d in self.values() if isinstance(d, Filter)
])
def _sorted_ingredients(self, ingredients):
def sort_key(id):
if id in self.Meta.ingredient_order:
return self.Meta.ingredient_order.index(id)
else:
return 9999
return tuple(sorted(ingredients, key=sort_key))
def __repr__(self):
""" A string representation of the ingredients used in a recipe
ordered by Dimensions, Metrics, Filters, then Havings
"""
lines = []
# sort the ingredients by type
for ingredient in sorted(self.values()):
lines.append(ingredient.describe())
return '\n'.join(lines)
def use(self, ingredient):
if not isinstance(ingredient, Ingredient):
raise TypeError(
'Can only set Ingredients as items on Shelf. '
'Got: {!r}'.format(ingredient)
)
# Track the order in which ingredients are added.
self.Meta.ingredient_order.append(ingredient.id)
self[ingredient.id] = ingredient
[docs] @classmethod
def from_config(
cls,
obj,
selectable,
ingredient_constructor=ingredient_from_validated_dict,
metadata=None
):
"""Create a shelf using a dict shelf definition.
:param obj: A Python dictionary describing a Shelf.
:param selectable: A SQLAlchemy Table, a Recipe, a table name, or a
SQLAlchemy join to select from.
:param metadata: If `selectable` is passed as a table name, then in
order to introspect its schema, we must have the SQLAlchemy
MetaData object to associate it with.
:return: A shelf that contains the ingredients defined in obj.
"""
from recipe import Recipe
if isinstance(selectable, Recipe):
selectable = selectable.subquery()
elif isinstance(selectable, basestring):
if '.' in selectable:
schema, tablename = selectable.split('.')
else:
schema, tablename = None, selectable
selectable = Table(
tablename,
metadata,
schema=schema,
extend_existing=True,
autoload=True
)
try:
validated_shelf = normalize_schema(
shelf_schema, obj, allow_unknown=True
)
except E.SureError as e:
raise BadIngredient(str(e))
d = {}
for k, v in iteritems(validated_shelf):
d[k] = ingredient_constructor(v, selectable)
shelf = cls(d, select_from=selectable)
return shelf
[docs] @classmethod
def from_yaml(cls, yaml_str, selectable, **kwargs):
""" Shim that calls from_validated_yaml.
This used to call a different implementation of yaml parsing
"""
return cls.from_validated_yaml(yaml_str, selectable, **kwargs)
[docs] @classmethod
def from_validated_yaml(cls, yaml_str, selectable, **kwargs):
"""Create a shelf using a yaml shelf definition.
:param yaml_str: A string containing yaml ingredient definitions.
:param selectable: A SQLAlchemy Table, a Recipe, or a SQLAlchemy
join to select from.
:return: A shelf that contains the ingredients defined in yaml_str.
"""
obj = safe_load(yaml_str)
return cls.from_config(obj, selectable, **kwargs)
[docs] def find(self, obj, filter_to_class=Ingredient, constructor=None):
"""
Find an Ingredient, optionally using the shelf.
:param obj: A string or Ingredient
:param filter_to_class: The Ingredient subclass that obj must be an
instance of
:param constructor: An optional callable for building Ingredients
from obj
:return: An Ingredient of subclass `filter_to_class`
"""
if callable(constructor):
obj = constructor(obj, shelf=self)
if isinstance(obj, basestring):
set_descending = obj.startswith('-')
if set_descending:
obj = obj[1:]
if obj not in self:
raise BadRecipe("{} doesn't exist on the shelf".format(obj))
ingredient = self[obj]
if not isinstance(ingredient, filter_to_class):
raise BadRecipe('{} is not a {}'.format(obj, filter_to_class))
if set_descending:
ingredient.ordering = 'desc'
return ingredient
elif isinstance(obj, filter_to_class):
return obj
else:
raise BadRecipe('{} is not a {}'.format(obj, filter_to_class))
[docs] def brew_query_parts(self):
""" Make columns, group_bys, filters, havings
"""
columns, group_bys, filters, havings = [], [], set(), set()
for ingredient in self.ingredients():
if ingredient.query_columns:
columns.extend(ingredient.query_columns)
if ingredient.group_by:
group_bys.extend(ingredient.group_by)
if ingredient.filters:
filters.update(ingredient.filters)
if ingredient.havings:
havings.update(ingredient.havings)
return {
'columns': columns,
'group_bys': group_bys,
'filters': filters,
'havings': havings,
}
[docs] def enchant(self, list, cache_context=None):
""" Add any calculated values to each row of a resultset generating a
new namedtuple
:param list: a list of row results
:param cache_context: optional extra context for caching
:return: a list with ingredient.cauldron_extras added for all
ingredients
"""
enchantedlist = []
if list:
sample_item = list[0]
# Extra fields to add to each row
# With extra callables
extra_fields, extra_callables = [], []
for ingredient in self.values():
if not isinstance(ingredient, (Dimension, Metric)):
continue
if cache_context:
ingredient.cache_context += str(cache_context)
for extra_field, extra_callable in ingredient.cauldron_extras:
extra_fields.append(extra_field)
extra_callables.append(extra_callable)
# Mixin the extra fields
keyed_tuple = lightweight_named_tuple(
'result', sample_item._fields + tuple(extra_fields)
)
# Iterate over the results and build a new namedtuple for each row
for row in list:
values = row + tuple(fn(row) for fn in extra_callables)
enchantedlist.append(keyed_tuple(values))
return enchantedlist
[docs]def AutomaticShelf(table):
"""Given a SQLAlchemy Table, automatically generate a Shelf with metrics
and dimensions based on its schema.
"""
if hasattr(table, '__table__'):
table = table.__table__
config = introspect_table(table)
return Shelf.from_config(config, table)
def introspect_table(table):
"""Given a SQLAlchemy Table object, return a Shelf description suitable
for passing to Shelf.from_config.
"""
d = {}
for c in table.columns:
if isinstance(c.type, String):
d[c.name] = {'kind': 'Dimension', 'field': c.name}
if isinstance(c.type, (Integer, Float)):
d[c.name] = {'kind': 'Metric', 'field': c.name}
return d