Source code for recipe.core

import logging
import time
import warnings
from uuid import uuid4

import tablib
from orderedset import OrderedSet
from sqlalchemy import alias
from sqlalchemy.sql.elements import BinaryExpression
from sureberus import normalize_dict, normalize_schema

from recipe.compat import str
from recipe.dynamic_extensions import run_hooks
from recipe.exceptions import BadRecipe
from recipe.ingredients import Dimension, Filter, Having, Metric
from recipe.shelf import Shelf, parse_condition
from recipe.utils import prettyprintable_sql
from recipe.schemas import recipe_schema

ALLOW_QUERY_CACHING = True

warnings.simplefilter('always', DeprecationWarning)

logger = logging.getLogger(__name__)


class Stats(object):

    def __init__(self):
        self.ready = False

    def set_stats(self, rows, dbtime, enchanttime, from_cache):
        self.ready = True
        self._rows = rows
        self._dbtime = dbtime
        self._enchanttime = enchanttime
        self._from_cache = from_cache

    def _get_value(self, prop):
        if self.ready:
            return getattr(self, prop)
        else:
            raise BadRecipe("Can't access stats before the query has run")

    @property
    def rows(self):
        """ The number of rows in this result. """
        return self._get_value('_rows')

    @property
    def dbtime(self):
        """ The amount of time the database took to process. """
        return self._get_value('_dbtime')

    @property
    def enchanttime(self):
        """ The amount of time the database took to process. """
        return self._get_value('_enchanttime')

    @property
    def from_cache(self):
        """ Was this result cached """
        return self._get_value('_from_cache')


[docs]class Recipe(object): """ A tool for getting data. Args: shelf (Shelf): A shelf to use for shared metrics metrics (:obj:`list` of :obj:`str`) A list of metrics to use from the shelf. These can also be :obj:`Metric` objects. dimensions (:obj:`list` of :obj:`str`) A list of dimensions to use from the shelf. These can also be :obj:`Dimension` objects. filters (:obj:`list` of :obj:`str`) A list of filters to use from the shelf. These can also be :obj:`Filter` objects. order_by (:obj:`list` of :obj:`str`) A list of dimension or metric keys from the shelf to use for ordering. If prefixed by '-' the ordering will be descending. session (:obj:`Session`) A SQLAlchemy database session. extension_classes (:obj:`list` of :obj:`RecipeExtension`) Extensions to apply to this recipe. Returns: A Recipe object. """ def __init__( self, shelf=None, metrics=None, dimensions=None, filters=None, order_by=None, session=None, extension_classes=(), dynamic_extensions=None ): self._select_from = None self._id = str(uuid4())[:8] self.shelf(shelf) # Stores all ingredients used in the recipe self._cauldron = Shelf() self._order_bys = [] self.cache_context = None self.stats = Stats() if metrics is not None: self.metrics(*metrics) if dimensions is not None: self.dimensions(*dimensions) if filters is not None: self.filters(*filters) if order_by is not None: self.order_by(*order_by) self._session = session self._limit = 0 self._offset = 0 self._is_postgres_engine = None # Store cached results in _query and _all # setting dirty to true invalidates these caches self.dirty = True # Have the rows been fetched self.all_dirty = True self._query = None self._all = [] self.recipe_extensions = [ ExtensionClass(self) for ExtensionClass in extension_classes ] self.dynamic_extensions = dynamic_extensions
[docs] @classmethod def from_config(cls, shelf, obj, **kwargs): """ Construct a Recipe from a plain Python dictionary. Most of the directives only support named ingredients, specified as strings, and looked up on the shelf. But filters can be specified as objects. Additionally, each RecipeExtension can extract and handle data from the configuration. """ def subdict(d, keys): new = {} for k in keys: if k in d: new[k] = d[k] return new core_kwargs = subdict(obj, recipe_schema['schema'].keys()) core_kwargs = normalize_schema(recipe_schema, core_kwargs) core_kwargs['filters'] = [ parse_condition(filter, shelf.Meta.select_from) if isinstance(filter, dict) else filter for filter in obj.get('filters', []) ] core_kwargs.update(kwargs) recipe = cls(shelf=shelf, **core_kwargs) # Now let extensions handle their own stuff for ext in recipe.recipe_extensions: additional_schema = getattr(ext, 'recipe_schema', None) if additional_schema is not None: ext_data = subdict(obj, additional_schema.keys()) ext_data = normalize_dict(additional_schema, ext_data) recipe = ext.from_config(ext_data) return recipe
# ------- # Builder for parts of the recipe. # ------- def __getattr__(self, name): """ Return an attribute of self, if not found, proxy to all recipe_extensions :param name: :return: """ try: return self.__getattribute__(name) except AttributeError: pass for extension in self.recipe_extensions: try: proxy_callable = getattr(extension, name) break except AttributeError: pass try: proxy_callable except NameError: raise AttributeError( '{} isn\'t available on this recipe, ' 'you may need to add an extension'.format(name) ) return proxy_callable
[docs] def shelf(self, shelf=None): """ Defines a shelf to use for this recipe """ if shelf is None: self._shelf = Shelf({}) elif isinstance(shelf, Shelf): self._shelf = shelf elif isinstance(shelf, dict): self._shelf = Shelf(shelf) else: raise BadRecipe('shelf must be a dict or recipe.shelf.Shelf') if self._select_from is None and \ self._shelf.Meta.select_from is not None: self._select_from = self._shelf.Meta.select_from return self
[docs] def metrics(self, *metrics): """ Add a list of Metric ingredients to the query. These can either be Metric objects or strings representing metrics on the shelf. The Metric expression will be added to the query's select statement. The metric value is a property of each row of the result. :param metrics: Metrics to add to the recipe. Metrics can either be keys on the ``shelf`` or Metric objects :type metrics: list """ for m in metrics: self._cauldron.use(self._shelf.find(m, Metric)) self.dirty = True return self
@property def metric_ids(self): return self._cauldron.metric_ids
[docs] def dimensions(self, *dimensions): """ Add a list of Dimension ingredients to the query. These can either be Dimension objects or strings representing dimensions on the shelf. The Dimension expression will be added to the query's select statement and to the group_by. :param dimensions: Dimensions to add to the recipe. Dimensions can either be keys on the ``shelf`` or Dimension objects :type dimensions: list """ for d in dimensions: self._cauldron.use(self._shelf.find(d, Dimension)) self.dirty = True return self
@property def dimension_ids(self): return self._cauldron.dimension_ids
[docs] def filters(self, *filters): """ Add a list of Filter ingredients to the query. These can either be Filter objects or strings representing filters on the service's shelf. ``.filters()`` are additive, calling .filters() more than once will add to the list of filters being used by the recipe. The Filter expression will be added to the query's where clause :param filters: Filters to add to the recipe. Filters can either be keys on the ``shelf`` or Filter objects :type filters: list """ def filter_constructor(f, shelf=None): if isinstance(f, BinaryExpression): return Filter(f) else: return f for f in filters: self._cauldron.use( self._shelf.find( f, (Filter, Having), constructor=filter_constructor ) ) self.dirty = True return self
@property def filter_ids(self): return self._cauldron.filter_ids
[docs] def order_by(self, *order_bys): """ Add a list of ingredients to order by to the query. These can either be Dimension or Metric objects or strings representing order_bys on the shelf. The Order_by expression will be added to the query's order_by statement :param order_bys: Order_bys to add to the recipe. Order_bys can either be keys on the ``shelf`` or Dimension or Metric objects. If the key is prefixed by "-" the ordering will be descending. :type order_bys: list """ # Order bys shouldn't be added to the _cauldron self._order_bys = [] for ingr in order_bys: order_by = self._shelf.find(ingr, (Dimension, Metric)) self._order_bys.append(order_by) self.dirty = True return self
def select_from(self, selectable): self.dirty = True self._select_from = selectable return self def session(self, session): self.dirty = True self._session = session return self
[docs] def limit(self, limit): """ Limit the number of rows returned from the database. :param limit: The number of rows to return in the recipe. 0 will return all rows. :type limit: int """ if self._limit != limit: self.dirty = True self._limit = limit return self
[docs] def offset(self, offset): """ Offset a number of rows before returning rows from the database. :param offset: The number of rows to offset in the recipe. 0 will return from the first available row :type offset: int """ if self._offset != offset: self.dirty = True self._offset = offset return self
# ------ # Utility functions # ------ def _is_postgres(self): """ Determine if the running engine is postgres """ if self._is_postgres_engine is None: is_postgres_engine = False try: dialect = self.session.bind.engine.name if 'redshift' in dialect or 'postg' in dialect or 'pg' in \ dialect: is_postgres_engine = True except: pass self._is_postgres_engine = is_postgres_engine return self._is_postgres_engine def _prepare_order_bys(self): """ Build a set of order by columns """ order_bys = OrderedSet() if self._order_bys: for ingredient in self._order_bys: if isinstance(ingredient, Dimension): # Reverse the ordering columns so that dimensions # order by their label rather than their id columns = reversed(ingredient.columns) else: columns = ingredient.columns for c in columns: order_by = c.desc() if ingredient.ordering == 'desc' else c if str(order_by) not in [str(o) for o in order_bys]: order_bys.add(order_by) return list(order_bys)
[docs] def query(self): """ Generates a query using the ingredients supplied by the recipe. :return: A SQLAlchemy query """ if len(self._cauldron.ingredients()) == 0: raise BadRecipe('No ingredients have been added to this recipe') if not self.dirty and self._query: return self._query # Step 1: Gather up global filters and user filters and # apply them as if they had been added to recipe().filters(...) for extension in self.recipe_extensions: extension.add_ingredients() # Step 2: Build the query (now that it has all the filters # and apply any blend recipes # Get the parts of the query from the cauldron # We don't need to regather order_bys recipe_parts = self._cauldron.brew_query_parts() recipe_parts['order_bys'] = self._prepare_order_bys() for extension in self.recipe_extensions: recipe_parts = extension.modify_recipe_parts(recipe_parts) # Start building the query query = self._session.query(*recipe_parts['columns']) if self._select_from is not None: query = query.select_from(self._select_from) recipe_parts['query'] = query \ .group_by(*recipe_parts['group_bys']) \ .order_by(*recipe_parts['order_bys']) \ .filter(*recipe_parts['filters']) if recipe_parts['havings']: for having in recipe_parts['havings']: recipe_parts['query'] = recipe_parts['query'].having(having) for extension in self.recipe_extensions: recipe_parts = extension.modify_prequery_parts(recipe_parts) if self._select_from is None and len( recipe_parts['query'].selectable.froms ) != 1: raise BadRecipe( 'Recipes must use ingredients that all come from ' 'the same table. \nDetails on this recipe:\n{' '}'.format(str(self._cauldron)) ) for extension in self.recipe_extensions: recipe_parts = extension.modify_postquery_parts(recipe_parts) recipe_parts = run_hooks( recipe_parts, 'modify_query', self.dynamic_extensions ) # Apply limit on the outermost query # This happens after building the comparison recipe if self._limit and self._limit > 0: recipe_parts['query'] = recipe_parts['query'].limit(self._limit) if self._offset and self._offset > 0: recipe_parts['query'] = recipe_parts['query'].offset(self._offset) # Step 5: Clear the dirty flag, # Patch the query if there's a comparison query # cache results self._query = recipe_parts['query'] self.dirty = False return self._query
@property def dirty(self): """ The recipe is dirty if it is flagged dirty or any extensions are flagged dirty """ if self._dirty: return True else: for extension in self.recipe_extensions: if extension.dirty: return True return False @dirty.setter def dirty(self, value): """ If dirty is true set the recipe to dirty flag. If false, clear the recipe and all extension dirty flags """ if value: self._dirty = True else: self._dirty = False for extension in self.recipe_extensions: extension.dirty = False def _table(self): """ A convenience method to determine the table the query is selecting from """ descriptions = self.query().column_descriptions if descriptions: return descriptions[0]['entity'] else: return None
[docs] def to_sql(self): """ A string representation of the SQL this recipe will generate. """ return prettyprintable_sql(self.query())
[docs] def subquery(self, name=None): """ The recipe's query as a subquery suitable for use in joins or other queries. """ query = self.query() return query.subquery(name=name)
[docs] def as_table(self, name=None): """ Return an alias to a table """ if name is None: name = self._id return alias(self.subquery(), name=name)
[docs] def all(self): """ Return a (potentially cached) list of result objects. """ starttime = fetchtime = enchanttime = time.time() fetched_from_cache = False if self.dirty or self.all_dirty: query = self.query() self._all = query.all() # If we're using a caching query and that query did not # save new values to cache, we got the cached results # This is not 100% accurate; it only reports if the caching query # attempts to save to cache not the internal state of the cache # and whether the cache save actually occurred. if not getattr(query, 'saved_to_cache', True): fetched_from_cache = True fetchtime = time.time() self._all = self._cauldron.enchant( self._all, cache_context=self.cache_context ) enchanttime = time.time() self.all_dirty = False else: # In this case we are using the object self._all as cache fetched_from_cache = True self.stats.set_stats( len(self._all), fetchtime - starttime, enchanttime - fetchtime, fetched_from_cache ) return self._all
[docs] def one(self): """ Return the first element on the result """ all = self.all() if len(all) > 0: return all[0] else: return []
@property def dataset(self): rows = self.all() if rows: first_row = rows[0] return tablib.Dataset(*rows, headers=first_row._fields) else: return tablib.Dataset([], headers=[])
[docs] def first(self): """ Return the first element on the result """ return self.one()