Source code for zink.zink

# zink.py
from .pipeline import Pseudonymizer

# Create a global instance to preserve cache across calls.
_global_instance = Pseudonymizer()

[docs] def redact( text, categories=None, placeholder=None, use_cache=True, use_json_mapping=True, extractor=None, merger=None, replacer=None, # Below are concurrency-related or advanced parameters: auto_parallel=False, chunk_size=1000, max_workers=4 ): """ Module-level convenience function that uses a global instance for caching. If 'auto_parallel' is True and len(text) > chunk_size, concurrency-based pipeline is used. Otherwise single-pass logic is used. """ if extractor is None and merger is None and replacer is None and use_json_mapping: # Use global instance + built-in concurrency if desired return _global_instance.redact( text=text, categories=categories, placeholder=placeholder, use_cache=use_cache, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers ) else: # Create a fresh instance pseudonymizer = Pseudonymizer( use_json_mapping=use_json_mapping, extractor=extractor, merger=merger, replacer=replacer ) return pseudonymizer.redact( text=text, categories=categories, placeholder=placeholder, use_cache=use_cache, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers )
[docs] def replace( text, categories=None, user_replacements=None, ensure_consistency=True, use_cache=True, use_json_mapping=True, extractor=None, merger=None, replacer=None, auto_parallel=False, chunk_size=1000, max_workers=4 ): """ Module-level convenience function that uses a global instance for caching. """ if extractor is None and merger is None and replacer is None and use_json_mapping: return _global_instance.replace( text=text, categories=categories, user_replacements=user_replacements, ensure_consistency=ensure_consistency, use_cache=use_cache, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers ) else: pseudonymizer = Pseudonymizer( use_json_mapping=use_json_mapping, extractor=extractor, merger=merger, replacer=replacer ) return pseudonymizer.replace( text=text, categories=categories, user_replacements=user_replacements, ensure_consistency=ensure_consistency, use_cache=use_cache, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers )
[docs] def replace_with_my_data( text, categories=None, user_replacements=None, ensure_consistency=True, use_json_mapping=True, extractor=None, merger=None, replacer=None, # Usually we don't cache user-defined replacements, but if you want concurrency, add it: auto_parallel=False, chunk_size=1000, max_workers=4 ): """ Module-level convenience function. Typically 'replace_with_my_data' does NOT rely on caching, but we might still want concurrency for large texts if 'auto_parallel' is True. """ if extractor is None and merger is None and replacer is None and use_json_mapping: return _global_instance.replace_with_my_data( text=text, categories=categories, user_replacements=user_replacements, ensure_consistency=ensure_consistency, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers ) else: pseudonymizer = Pseudonymizer( use_json_mapping=use_json_mapping, extractor=extractor, merger=merger, replacer=replacer ) return pseudonymizer.replace_with_my_data( text=text, categories=categories, user_replacements=user_replacements, ensure_consistency=ensure_consistency, auto_parallel=auto_parallel, chunk_size=chunk_size, max_workers=max_workers )