Source code for crate_anon.common.stringfunc
#!/usr/bin/env python
# crate_anon/common/stringfunc.py
"""
===============================================================================
Copyright (C) 2015-2018 Rudolf Cardinal (rudolf@pobox.com).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <http://www.gnu.org/licenses/>.
===============================================================================
"""
import fnmatch
from functools import lru_cache
import sys
import regex
# =============================================================================
# Simple string manipulation
# =============================================================================
[docs]def get_digit_string_from_vaguely_numeric_string(s: str) -> str:
"""
Strips non-digit characters from a string.
For example, converts "(01223) 123456" to "01223123456".
"""
return "".join([d for d in s if d.isdigit()])
[docs]def reduce_to_alphanumeric(s: str) -> str:
"""
Strips non-alphanumeric characters from a string.
For example, converts "PE12 3AB" to "PE12 3AB".
"""
return "".join([d for d in s if d.isalnum()])
[docs]def remove_whitespace(s: str) -> str:
"""
Removes whitespace from a string.
"""
return ''.join(s.split())
# =============================================================================
# Specification matching
# =============================================================================
@lru_cache(maxsize=None)
def get_spec_match_regex(spec):
return regex.compile(fnmatch.translate(spec), regex.IGNORECASE)
# =============================================================================
# Printing/encoding
# =============================================================================
[docs]def uprint(*objects, sep=' ', end='\n', file=sys.stdout):
"""
Prints strings to outputs that support UTF-8 encoding, but also to those
that do not (e.g. Windows stdout).
"""
# http://stackoverflow.com/questions/14630288/unicodeencodeerror-charmap-codec-cant-encode-character-maps-to-undefined # noqa
enc = file.encoding
if enc == 'UTF-8':
print(*objects, sep=sep, end=end, file=file)
else:
def f(obj):
return str(obj).encode(enc, errors='backslashreplace').decode(enc)
# https://docs.python.org/3.5/library/codecs.html#codec-base-classes
print(*map(f, objects), sep=sep, end=end, file=file)