import importlib
from collections import OrderedDict
from copy import copy
from six import iteritems
from sqlalchemy import Float, Integer, String, and_, case, distinct, func, or_
from sqlalchemy.util import lightweight_named_tuple
from yaml import safe_load
from recipe.compat import basestring
from recipe.exceptions import BadIngredient, BadRecipe
from recipe.ingredients import Dimension, Filter, Ingredient, Metric
from recipe.utils import AttrDict
from recipe.validators import IngredientValidator
# Ensure case and distinct don't get reaped. We need it in scope for
# creating Metrics
_distinct = distinct
_case = case
def ingredient_class_for_name(class_name):
# load the module, will raise ImportError if module cannot be loaded
m = importlib.import_module('recipe.ingredients')
# get the class, will raise AttributeError if class cannot be found
c = getattr(m, class_name, None)
return c
def parse_condition(cond, table, aggregated=False, default_aggregation='sum'):
"""Create a SQLAlchemy clause from a condition."""
if cond is None:
return None
else:
if 'and' in cond:
conditions = [
parse_condition(c, table, aggregated, default_aggregation)
for c in cond['and']
]
return and_(*conditions)
elif 'or' in cond:
conditions = [
parse_condition(c, table, aggregated, default_aggregation)
for c in cond['or']
]
return or_(*conditions)
elif 'field' not in cond:
raise BadIngredient('field must be defined in condition')
field = parse_field(
cond['field'],
table,
aggregated=aggregated,
default_aggregation=default_aggregation
)
if 'in' in cond:
value = cond['in']
if isinstance(value, (dict,)):
raise BadIngredient('value for in must be a list')
condition_expression = getattr(field, 'in_')(tuple(value))
elif 'gt' in cond:
value = cond['gt']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__gt__')(value)
elif 'gte' in cond:
value = cond['gte']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__ge__')(value)
elif 'lt' in cond:
value = cond['lt']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__lt__')(value)
elif 'lte' in cond:
value = cond['lte']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__le__')(value)
elif 'eq' in cond:
value = cond['eq']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__eq__')(value)
elif 'ne' in cond:
value = cond['ne']
if isinstance(value, (list, dict)):
raise BadIngredient('conditional value must be a scalar')
condition_expression = getattr(field, '__ne__')(value)
else:
raise BadIngredient('Bad condition')
return condition_expression
def tokenize(s):
""" Tokenize a string by splitting it by + and -
>>> tokenize('this + that')
['this', 'PLUS', 'that']
>>> tokenize('this+that')
['this', 'PLUS', 'that']
>>> tokenize('this+that-other')
['this', 'PLUS', 'that', 'MINUS', 'other']
"""
# Crude tokenization
s = s.replace('+', ' PLUS ').replace('-', ' MINUS ') \
.replace('/', ' DIVIDE ').replace('*', ' MULTIPLY ')
words = [w for w in s.split(' ') if w]
return words
def parse_field(fld, table, aggregated=True, default_aggregation='sum'):
""" Parse a field object from yaml into a sqlalchemy expression """
# An aggregation is a callable that takes a single field expression
# None will perform no aggregation
aggregation_lookup = {
'sum': func.sum,
'min': func.min,
'max': func.max,
'avg': func.avg,
'count': func.count,
'count_distinct': lambda fld: func.count(distinct(fld)),
'month': lambda fld: func.date_trunc('month', fld),
'week': lambda fld: func.date_trunc('week', fld),
'year': lambda fld: func.date_trunc('year', fld),
'quarter': lambda fld: func.date_trunc('quarter', fld),
'age': lambda fld: func.date_part('year', func.age(fld)),
None: lambda fld: fld,
}
# Ensure that the dictionary contains:
# {
# 'value': str,
# 'aggregation': str|None,
# 'condition': dict|None
# }
if isinstance(fld, basestring):
fld = {
'value': fld,
}
if not isinstance(fld, dict):
raise BadIngredient('fields must be a string or a dict')
if 'value' not in fld:
raise BadIngredient('fields must contain a value')
if not isinstance(fld['value'], basestring):
raise BadIngredient('field value must be a string')
# Ensure a condition
if 'condition' in fld:
if not isinstance(fld['condition'], dict) and \
not fld['condition'] is None:
raise BadIngredient('condition must be null or an object')
else:
fld['condition'] = None
# Ensure an aggregation
initial_aggregation = default_aggregation if aggregated else None
if 'aggregation' in fld:
if not isinstance(fld['aggregation'], basestring) and \
not fld['aggregation'] is None:
raise BadIngredient('aggregation must be null or an string')
if fld['aggregation'] is None:
fld['aggregation'] = initial_aggregation
else:
fld['aggregation'] = initial_aggregation
value = fld.get('value', None)
if value is None:
raise BadIngredient('field value is not defined')
field_parts = []
for word in tokenize(value):
if word in ('MINUS', 'PLUS', 'DIVIDE', 'MULTIPLY'):
field_parts.append(word)
else:
if hasattr(table, word):
field_parts.append(getattr(table, word))
elif hasattr(table, 'c') and hasattr(table.c, word):
field_parts.append(getattr(table.c, word))
else:
raise BadIngredient(
'{} is not a field in {}'.format(word, table)
)
if len(field_parts) is None:
raise BadIngredient('field is not defined.')
# Fields should have an odd number of parts
if len(field_parts) % 2 != 1:
raise BadIngredient('field does not have the right number of parts')
field = field_parts[0]
if len(field_parts) > 1:
# if we need to add and subtract from the field
# join the field parts into pairs, for instance if field parts is
# [MyTable.first, 'MINUS', MyTable.second, 'PLUS', MyTable.third]
# we will get two pairs here
# [('MINUS', MyTable.second), ('PLUS', MyTable.third)]
for operator, other_field in zip(field_parts[1::2], field_parts[2::2]):
if operator == 'PLUS':
field = field.__add__(other_field)
elif operator == 'MINUS':
field = field.__sub__(other_field)
elif operator == 'DIVIDE':
field = field.__div__(other_field)
elif operator == 'MULTIPLY':
field = field.__mul__(other_field)
else:
raise BadIngredient('Unknown operator {}'.format(operator))
# Handle the aggregator
aggr = fld.get('aggregation', 'sum')
if aggr is not None:
aggr = aggr.strip()
if aggr not in aggregation_lookup:
raise BadIngredient('unknown aggregation {}'.format(aggr))
aggregator = aggregation_lookup[aggr]
condition = parse_condition(
fld.get('condition', None),
table=table,
aggregated=False,
default_aggregation=default_aggregation
)
if condition is not None:
field = case([(condition, field)])
return aggregator(field)
def ingredient_from_dict(ingr_dict, table=''):
"""Create an ingredient from an dictionary.
This object will be deserialized from yaml """
# TODO: This is deprecated in favor of
# ingredient_from_validated_dict
# Describe the required params for each kind of ingredient
# The key is the parameter name, the value is one of
# field: A parse_field with aggregation=False
# aggregated_field: A parse_field with aggregation=True
# condition: A parse_condition
params_lookup = {
'Dimension': {
'field': 'field'
},
'LookupDimension': {
'field': 'field'
},
'IdValueDimension':
OrderedDict(id_field='field', field='field'),
'Metric': {
'field': 'aggregated_field'
},
'DivideMetric':
OrderedDict(
numerator_field='aggregated_field',
denominator_field='aggregated_field'
),
'WtdAvgMetric':
OrderedDict(field='field', weight='field')
}
format_lookup = {
'comma': ',.0f',
'dollar': '$,.0f',
'percent': '.0%',
'comma1': ',.1f',
'dollar1': '$,.1f',
'percent1': '.1%',
'comma2': ',.2f',
'dollar2': '$,.2f',
'percent2': '.2%',
}
kind = ingr_dict.pop('kind', 'Metric')
IngredientClass = ingredient_class_for_name(kind)
if IngredientClass is None:
raise BadIngredient('Unknown ingredient kind')
params = params_lookup.get(kind, {'field': 'field'})
args = []
for k, v in iteritems(params):
# All the params must be in the dict
if k not in ingr_dict:
raise BadIngredient(
'{} must be defined to make a {}'.format(k, kind)
)
if v == 'field':
statement = parse_field(
ingr_dict.pop(k, None), table, aggregated=False
)
elif v == 'aggregated_field':
statement = parse_field(
ingr_dict.pop(k, None), table, aggregated=True
)
elif v == 'condition':
statement = parse_condition(
ingr_dict.pop(k, None), table, aggregated=True
)
else:
raise BadIngredient('Do not know what this is')
args.append(statement)
# Remaining properties in ingr_dict are treated as keyword args
# If the format string exists in format_lookup, use the value otherwise
# use the original format
if 'format' in ingr_dict:
ingr_dict['format'] = format_lookup.get(
ingr_dict['format'], ingr_dict['format']
)
return IngredientClass(*args, **ingr_dict)
def parse_validated_field(fld, table=''):
""" Converts a validated field to sqlalchemy """
aggr_fn = IngredientValidator.aggregation_lookup[fld['aggregation']]
if hasattr(table, fld['value']):
field = getattr(table, fld['value'])
elif hasattr(table, 'c') and hasattr(table.c, fld):
field = getattr(table.c, fld['value'])
else:
raise BadIngredient('{} is not a field in {}'.format(fld, table))
for operator in fld.get('operators', []):
op = operator['operator']
other_field = parse_validated_field(operator['field'], table=table)
field = IngredientValidator.operator_lookup[op](field)(other_field)
condition = fld.get('condition', None)
if condition:
condition = parse_condition(condition, table=table)
field = case([(condition, field)])
field = aggr_fn(field)
return field
def ingredient_from_validated_dict(ingr_dict, table=''):
""" Create an ingredient from an dictionary.
This object will be deserialized from yaml """
validator = IngredientValidator(schema=ingr_dict['kind'])
if not validator.validate(ingr_dict):
raise Exception(validator.errors)
ingr_dict = validator.document
kind = ingr_dict.pop('kind', 'Metric')
IngredientClass = ingredient_class_for_name(kind)
if IngredientClass is None:
raise BadIngredient('Unknown ingredient kind')
args = []
for fld in ingr_dict.pop('_fields', []):
args.append(parse_validated_field(ingr_dict.pop(fld), table=table))
return IngredientClass(*args, **ingr_dict)
[docs]class Shelf(AttrDict):
""" Holds ingredients used by a recipe
Args:
Returns:
A Shelf object
"""
class Meta:
anonymize = False
table = None
def __init__(self, *args, **kwargs):
super(Shelf, self).__init__(*args, **kwargs)
self.Meta.ingredient_order = []
self.Meta.table = kwargs.pop('table', None)
# Set the ids of all ingredients on the shelf to the key
for k, ingredient in self.items():
ingredient.id = k
def get(self, k, d=None):
ingredient = super(Shelf, self).get(k, d)
if isinstance(ingredient, Ingredient):
ingredient.id = k
ingredient.anonymize = self.Meta.anonymize
return ingredient
def __getitem__(self, key):
""" Set the id and anonymize property of the ingredient whenever we
get or set items """
ingredient = super(Shelf, self).__getitem__(key)
ingredient.id = key
ingredient.anonymize = self.Meta.anonymize
return ingredient
def __setitem__(self, key, ingredient):
""" Set the id and anonymize property of the ingredient whenever we
get or set items """
ingredient_copy = copy(ingredient)
ingredient_copy.id = key
ingredient_copy.anonymize = self.Meta.anonymize
super(Shelf, self).__setitem__(key, ingredient_copy)
[docs] def ingredients(self):
""" Return the ingredients in this shelf in a deterministic order """
return sorted(list(self.values()))
@property
def dimension_ids(self):
""" Return the Dimensions on this shelf in the order in which
they were used."""
return tuple(
sorted(
[d.id for d in self.values() if isinstance(d, Dimension)],
key=
lambda id: self.Meta.ingredient_order.index(id) if id in self.Meta.ingredient_order else 9999
)
)
@property
def metric_ids(self):
""" Return the Metrics on this shelf in the order in which
they were used. """
return tuple(
sorted(
[d.id for d in self.values() if isinstance(d, Metric)],
key=
lambda id: self.Meta.ingredient_order.index(id) if id in self.Meta.ingredient_order else 9999
)
)
@property
def filter_ids(self):
""" Return the Filters on this shelf in the order in which
they were used. """
return tuple(
sorted(
[d.id for d in self.values() if isinstance(d, Filter)],
key=
lambda id: self.Meta.ingredient_order.index(id) if id in self.Meta.ingredient_order else 9999
)
)
def __repr__(self):
""" A string representation of the ingredients used in a recipe
ordered by Dimensions, Metrics, Filters, then Havings
"""
lines = []
# sort the ingredients by type
for ingredient in sorted(self.values()):
lines.append(ingredient.describe())
return '\n'.join(lines)
def use(self, ingredient):
# Track the order in which ingredients are added.
self.Meta.ingredient_order.append(ingredient.id)
self[ingredient.id] = ingredient
@classmethod
def from_yaml(cls, yaml_str, table):
from recipe.core import Recipe, make_table
if isinstance(table, Recipe):
table = make_table(table)
obj = safe_load(yaml_str)
d = {}
for k, v in iteritems(obj):
d[k] = ingredient_from_dict(v, table)
shelf = cls(d)
return shelf
@classmethod
def from_validated_yaml(cls, yaml_str, table):
from recipe.core import Recipe, make_table
if isinstance(table, Recipe):
table = make_table(table)
obj = safe_load(yaml_str)
d = {}
for k, v in iteritems(obj):
d[k] = ingredient_from_validated_dict(v, table)
shelf = cls(d)
return shelf
[docs] def find(self, obj, filter_to_class=Ingredient, constructor=None):
"""
Find an Ingredient, optionally using the shelf.
:param obj: A string or Ingredient
:param filter_to_class: The Ingredient subclass that obj must be an
instance of
:param constructor: An optional callable for building Ingredients
from obj
:return: An Ingredient of subclass `filter_to_class`
"""
if callable(constructor):
obj = constructor(obj, shelf=self)
if isinstance(obj, basestring):
set_descending = obj.startswith('-')
if set_descending:
obj = obj[1:]
if obj not in self:
raise BadRecipe("{} doesn't exist on the shelf".format(obj))
ingredient = self[obj]
if not isinstance(ingredient, filter_to_class):
raise BadRecipe('{} is not a {}'.format(obj, filter_to_class))
if set_descending:
ingredient.ordering = 'desc'
return ingredient
elif isinstance(obj, filter_to_class):
return obj
else:
raise BadRecipe(
'{} is not a {}'.format(obj, type(filter_to_class))
)
[docs] def brew_query_parts(self):
""" Make columns, group_bys, filters, havings
"""
columns, group_bys, filters, havings = [], [], set(), set()
for ingredient in self.ingredients():
if ingredient.query_columns:
columns.extend(ingredient.query_columns)
if ingredient.group_by:
group_bys.extend(ingredient.group_by)
if ingredient.filters:
filters.update(ingredient.filters)
if ingredient.havings:
havings.update(ingredient.havings)
return {
'columns': columns,
'group_bys': group_bys,
'filters': filters,
'havings': havings,
}
[docs] def enchant(self, list, cache_context=None):
""" Add any calculated values to each row of a resultset generating a
new namedtuple
:param list: a list of row results
:param cache_context: optional extra context for caching
:return: a list with ingredient.cauldron_extras added for all
ingredients
"""
enchantedlist = []
if list:
sample_item = list[0]
# Extra fields to add to each row
# With extra callables
extra_fields, extra_callables = [], []
for ingredient in self.values():
if not isinstance(ingredient, (Dimension, Metric)):
continue
if cache_context:
ingredient.cache_context += str(cache_context)
for extra_field, extra_callable in ingredient.cauldron_extras:
extra_fields.append(extra_field)
extra_callables.append(extra_callable)
# Mixin the extra fields
keyed_tuple = lightweight_named_tuple(
'result', sample_item._fields + tuple(extra_fields)
)
# Iterate over the results and build a new namedtuple for each row
for row in list:
values = row + tuple(fn(row) for fn in extra_callables)
enchantedlist.append(keyed_tuple(values))
return enchantedlist
[docs]class AutomaticShelf(Shelf):
def __init__(self, table, *args, **kwargs):
d = self._introspect(table)
super(AutomaticShelf, self).__init__(d)
def _introspect(self, table):
""" Build initial shelf using table """
d = {}
for c in table.__table__.columns:
if isinstance(c.type, String):
d[c.name] = Dimension(c)
if isinstance(c.type, (Integer, Float)):
d[c.name] = Metric(func.sum(c))
return d