from __future__ import print_function, absolute_import
import importlib
import re
import string
import unicodedata
from .compat import str as compat_str
from functools import wraps
from pyhash import metro_64
try:
# getfullargspec is not available in python2
# getargspec is deprecated
# try to use getfullargspec if possible
from inspect import isclass, getfullargspec as getargspec
except ImportError:
from inspect import getargspec, isclass
import attr
import sqlalchemy.orm
import sqlparse
from faker import Faker
from faker.providers import BaseProvider
from sqlalchemy.engine.default import DefaultDialect
from sqlalchemy.sql.functions import FunctionElement
from sqlalchemy.sql.sqltypes import Date, DateTime, NullType, String
from recipe.compat import basestring, integer_types, str
# only expose the printing sql function
__all__ = [
"prettyprintable_sql",
"generate_faker_seed",
"clean_unicode",
"FakerAnonymizer",
"FakerFormatter",
]
hash_fn = metro_64()
def generate_faker_seed(value):
"""Generate a seed value for faker. """
if not isinstance(value, compat_str):
value = compat_str(value)
return hash_fn(value)
def recipe_arg(*args):
"""Decorator for recipe builder arguments.
Promotes builder pattern by returning self.
"""
def decorator(func):
@wraps(func)
def wrapper(self, *_args, **_kwargs):
from recipe import Recipe, RecipeExtension, BadRecipe
if isinstance(self, Recipe):
recipe = self
elif isinstance(self, RecipeExtension):
recipe = self.recipe
else:
raise BadRecipe(
"recipe_arg can only be applied to"
"methods of Recipe or RecipeExtension"
)
if recipe._query is not None:
recipe.reset()
func(self, *_args, **_kwargs)
return recipe
return wrapper
return decorator
class TestProvider(BaseProvider):
"""A demo faker provider for testing string providers"""
def foo(self):
return "foo"
class StringLiteral(String):
""" Teach SA how to literalize various things. """
def literal_processor(self, dialect):
super_processor = super(StringLiteral, self).literal_processor(dialect)
def process(value):
if isinstance(value, integer_types):
return str(value)
if not isinstance(value, basestring):
value = str(value)
result = super_processor(value)
if isinstance(result, bytes):
result = result.decode(dialect.encoding)
return result
return process
def prettyprintable_sql(statement, dialect=None, reindent=True):
"""
Generate an SQL expression string with bound parameters rendered inline
for the given SQLAlchemy statement. The function can also receive a
`sqlalchemy.orm.Query` object instead of statement.
WARNING: Should only be used for debugging. Inlining parameters is not
safe when handling user created data.
"""
if isinstance(statement, sqlalchemy.orm.Query):
if dialect is None:
dialect = statement.session.get_bind().dialect
statement = statement.statement
# Generate a class that can handle encoding
if dialect:
DialectKlass = dialect.__class__
else:
DialectKlass = DefaultDialect
class LiteralDialect(DialectKlass):
colspecs = {
# prevent various encoding explosions
String: StringLiteral,
# teach SA about how to literalize a datetime
DateTime: StringLiteral,
Date: StringLiteral,
# don't format py2 long integers to NULL
NullType: StringLiteral,
}
compiled = statement.compile(
dialect=LiteralDialect(), compile_kwargs={"literal_binds": True}
)
return sqlparse.format(str(compiled), reindent=reindent)
WHITESPACE_RE = re.compile(r"\s+", flags=re.DOTALL | re.MULTILINE)
def replace_whitespace_with_space(s):
""" Replace multiple whitespaces with a single space. """
return WHITESPACE_RE.sub(" ", s)
def clean_unicode(value):
try:
cleaned_value = str(value)
except UnicodeEncodeError:
cleaned_value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore")
if not cleaned_value:
raise ValueError("Could not find useful chars in the string")
return cleaned_value
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
def disaggregate(expr):
if isinstance(expr, FunctionElement):
return expr.clause_expr
else:
return expr
class FakerFormatter(string.Formatter):
"""A formatter that can get values from Faker generators."""
def format_field(self, obj, format_spec):
"""
:param obj: A faker Faker
:param format_spec: a generator
:return: A string generated by
"""
generator = format_spec
kwargs = {}
if "|" in format_spec:
try:
newgenerator, potential_kwargs = format_spec.split("|")
for part in potential_kwargs.split(","):
k, v = part.split("=")
if v == "None":
v = None
elif v == "True":
v = True
elif v == "False":
v = False
elif v.isdigit():
v = int(v)
kwargs[k] = v
generator = newgenerator
except ValueError:
# If more than one "|" don't try to parse
# If the kwargs aren't of form x=y then don't try to parse
pass
value = None
if callable(getattr(obj, generator)):
c = getattr(obj, generator)
argspec = getargspec(c)
if len(argspec.args) == 1:
value = getattr(obj, generator)()
elif kwargs:
value = getattr(obj, generator)(**kwargs)
else:
value = c
if value is not None and not isinstance(value, basestring):
value = str(value)
return value or "Unknown fake generator"
[docs]@attr.s
class FakerAnonymizer(object):
"""Returns a deterministically generated fake value that depends on the
input value. """
format_str = attr.ib()
postprocessor = attr.ib()
locale = attr.ib(default="en_US")
postprocessor = attr.ib(default=None)
providers = attr.ib(default=None)
def __attrs_post_init__(self):
self.fake = Faker(self.locale)
self.formatter = FakerFormatter()
for p in self._clean_providers(self.providers):
self.fake.add_provider(p)
def _clean_providers(self, providers):
"""Convert a list of anonymizer providers into classes suitable for
adding with faker.add_provider"""
if not providers:
return []
if not isinstance(providers, (list, tuple)):
providers = [providers]
cleaned_providers = []
for provider in providers:
if isinstance(provider, basestring):
# dynamically import the provider
parts = provider.split(".")
if len(parts) > 1:
_module = ".".join(parts[:-1])
_provider_class = parts[-1]
try:
_mod = importlib.import_module(_module)
_provider = getattr(_mod, _provider_class, None)
if _provider is None:
# TODO: log an issue, provider not found in module
continue
elif not issubclass(_provider, BaseProvider):
# TODO: log an issue, provider not generator
continue
else:
cleaned_providers.append(_provider)
except ImportError:
# TODO: log an issue, can't import module
continue
elif isclass(provider) and issubclass(provider, BaseProvider):
cleaned_providers.append(provider)
else:
# TODO: log an issue, provider is not an importable string
# or a ProviderBase
continue
return cleaned_providers
def __call__(self, value):
self.fake.seed_instance(generate_faker_seed(value))
value = self.formatter.format(self.format_str, fake=self.fake)
if self.postprocessor is None:
return value
else:
return self.postprocessor(value)