Source code for utool.util_tags
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import six
import re
import operator
from utool import util_inject
print, rrr, profile = util_inject.inject2(__name__)
[docs]def modify_tags(
tags_list,
direct_map=None,
regex_map=None,
regex_aug=None,
delete_unmapped=False,
return_unmapped=False,
return_map=False,
):
import utool as ut
tag_vocab = ut.unique(ut.flatten(tags_list))
alias_map = ut.odict()
if regex_map is not None:
alias_map.update(**ut.build_alias_map(regex_map, tag_vocab))
if direct_map is not None:
alias_map.update(ut.odict(direct_map))
new_tags_list = tags_list
new_tags_list = ut.alias_tags(new_tags_list, alias_map)
if regex_aug is not None:
alias_aug = ut.build_alias_map(regex_aug, tag_vocab)
aug_tags_list = ut.alias_tags(new_tags_list, alias_aug)
new_tags_list = [
ut.unique(t1 + t2) for t1, t2 in zip(new_tags_list, aug_tags_list)
]
unmapped = list(set(tag_vocab) - set(alias_map.keys()))
if delete_unmapped:
new_tags_list = [ut.setdiff(tags, unmapped) for tags in new_tags_list]
toreturn = None
if return_map:
toreturn = (alias_map,)
if return_unmapped:
toreturn = toreturn + (unmapped,)
if toreturn is None:
toreturn = new_tags_list
else:
toreturn = (new_tags_list,) + toreturn
return toreturn
[docs]def tag_coocurrence(tags_list):
import utool as ut
co_occur_list = []
for tags in tags_list:
for combo in ut.combinations(tags, 2):
key = tuple(sorted(combo))
co_occur_list.append(key)
co_occur = ut.dict_hist(co_occur_list, ordered=True)
# co_occur[key] += 1
# co_occur = ut.odict(co_occur)
return co_occur
[docs]def tag_hist(tags_list):
import utool as ut
return ut.dict_hist(ut.flatten(tags_list), ordered=True)
[docs]def build_alias_map(regex_map, tag_vocab):
"""
Constructs explicit mapping. Order of items in regex map matters.
Items at top are given preference.
Example:
>>> # DISABLE_DOCTEST
>>> tags_list = [['t1', 't2'], [], ['t3'], ['t4', 't5']]
>>> tag_vocab = ut.flat_unique(*tags_list)
>>> regex_map = [('t[3-4]', 'A9'), ('t0', 'a0')]
>>> unmapped = list(set(tag_vocab) - set(alias_map.keys()))
"""
import utool as ut
import re
alias_map = ut.odict([])
for pats, new_tag in reversed(regex_map):
pats = ut.ensure_iterable(pats)
for pat in pats:
flags = [re.match(pat, t) for t in tag_vocab]
for old_tag in ut.compress(tag_vocab, flags):
alias_map[old_tag] = new_tag
identity_map = ut.take_column(regex_map, 1)
for tag in ut.filter_Nones(identity_map):
alias_map[tag] = tag
return alias_map
[docs]def alias_tags(tags_list, alias_map):
"""
update tags to new values
Args:
tags_list (list):
alias_map (list): list of 2-tuples with regex, value
Returns:
list: updated tags
CommandLine:
python -m utool.util_tags alias_tags --show
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_tags import * # NOQA
>>> import utool as ut
>>> tags_list = [['t1', 't2'], [], ['t3'], ['t4', 't5']]
>>> ut.build_alias_map()
>>> result = alias_tags(tags_list, alias_map)
>>> print(result)
"""
def _alias_dict(tags):
tags_ = [alias_map.get(t, t) for t in tags]
return list(set([t for t in tags_ if t is not None]))
tags_list_ = [_alias_dict(tags) for tags in tags_list]
return tags_list_
# def _fix_tags(tags):
# return {six.text_type(t.lower()) for t in tags}
# tags_list_ = list(map(_fix_tags, tags_list))
# re_list = [re.compile(pat) for pat, val in alias_map]
# val_list = ut.take_column(alias_map, 0)
# def _alias_regex(tags):
# new_tags = 0
# for t in tags:
# matched = [re_.match(t) is not None for re_ in re_list]
# matched_idx = ut.where(matched)
# assert len(matched_idx) <= 1, 'more than one tag in %r matched pattern' % (tags,)
# if len(matched_idx) > 0:
# repl_tags = ut.take(val_list, matched_idx)
# new_tags.extend(repl_tags)
# else:
# new_tags.append(t)
# return new_tags
# # tags_list_ = [_alias_regex(tags) for tags in tags_list_]
# return tags_list_
[docs]def filterflags_general_tags(
tags_list,
has_any=None,
has_all=None,
has_none=None,
min_num=None,
max_num=None,
any_startswith=None,
any_endswith=None,
in_any=None,
any_match=None,
none_match=None,
logic='and',
ignore_case=True,
):
r"""
maybe integrate into utool? Seems pretty general
Args:
tags_list (list):
has_any (None): (default = None)
has_all (None): (default = None)
min_num (None): (default = None)
max_num (None): (default = None)
Notes:
in_any should probably be ni_any
TODO: make this function more natural
CommandLine:
python -m utool.util_tags --exec-filterflags_general_tags
python -m utool.util_tags --exec-filterflags_general_tags:0 --helpx
python -m utool.util_tags --exec-filterflags_general_tags:0
python -m utool.util_tags --exec-filterflags_general_tags:0 --none_match n
python -m utool.util_tags --exec-filterflags_general_tags:0 --has_none=n,o
python -m utool.util_tags --exec-filterflags_general_tags:1
python -m utool.util_tags --exec-filterflags_general_tags:2
Ignore:
>>> # ENABLE_DOCTEST
>>> from utool.util_tags import * # NOQA
>>> import utool as ut
>>> tags_list = [['v'], [], ['P'], ['P', 'o'], ['n', 'o'], [], ['n', 'N'], ['e', 'i', 'p', 'b', 'n'], ['q', 'v'], ['n'], ['n'], ['N']]
>>> kwargs = ut.argparse_dict(ut.get_kwdefaults2(filterflags_general_tags), type_hint=list)
>>> print('kwargs = %r' % (kwargs,))
>>> flags = filterflags_general_tags(tags_list, **kwargs)
>>> print(flags)
>>> result = ut.compress(tags_list, flags)
>>> print('result = %r' % (result,))
Ignore:
>>> # ENABLE_DOCTEST
>>> from utool.util_tags import * # NOQA
>>> import utool as ut
>>> tags_list = [['v'], [], ['P'], ['P'], ['n', 'o'], [], ['n', 'N'], ['e', 'i', 'p', 'b', 'n'], ['n'], ['n'], ['N']]
>>> has_all = 'n'
>>> min_num = 1
>>> flags = filterflags_general_tags(tags_list, has_all=has_all, min_num=min_num)
>>> result = ut.compress(tags_list, flags)
>>> print('result = %r' % (result,))
Ignore:
>>> # ENABLE_DOCTEST
>>> from utool.util_tags import * # NOQA
>>> import utool as ut
>>> tags_list = [['vn'], ['vn', 'no'], ['P'], ['P'], ['n', 'o'], [], ['n', 'N'], ['e', 'i', 'p', 'b', 'n'], ['n'], ['n', 'nP'], ['NP']]
>>> kwargs = {
>>> 'any_endswith': 'n',
>>> 'any_match': None,
>>> 'any_startswith': 'n',
>>> 'has_all': None,
>>> 'has_any': None,
>>> 'has_none': None,
>>> 'max_num': 3,
>>> 'min_num': 1,
>>> 'none_match': ['P'],
>>> }
>>> flags = filterflags_general_tags(tags_list, **kwargs)
>>> filtered = ut.compress(tags_list, flags)
>>> result = ('result = %s' % (ut.repr2(filtered),))
result = [['vn', 'no'], ['n', 'o'], ['n', 'N'], ['n'], ['n', 'nP']]
"""
import numpy as np
import utool as ut
def _fix_tags(tags):
if ignore_case:
return set([]) if tags is None else {six.text_type(t.lower()) for t in tags}
else:
return set([]) if tags is None else {six.text_type() for t in tags}
if logic is None:
logic = 'and'
logic_func = {'and': np.logical_and, 'or': np.logical_or,}[logic]
default_func = {'and': np.ones, 'or': np.zeros,}[logic]
tags_list_ = [_fix_tags(tags_) for tags_ in tags_list]
flags = default_func(len(tags_list_), dtype=np.bool)
if min_num is not None:
flags_ = [len(tags_) >= min_num for tags_ in tags_list_]
logic_func(flags, flags_, out=flags)
if max_num is not None:
flags_ = [len(tags_) <= max_num for tags_ in tags_list_]
logic_func(flags, flags_, out=flags)
if has_any is not None:
has_any = _fix_tags(set(ut.ensure_iterable(has_any)))
flags_ = [len(has_any.intersection(tags_)) > 0 for tags_ in tags_list_]
logic_func(flags, flags_, out=flags)
if has_none is not None:
has_none = _fix_tags(set(ut.ensure_iterable(has_none)))
flags_ = [len(has_none.intersection(tags_)) == 0 for tags_ in tags_list_]
logic_func(flags, flags_, out=flags)
if has_all is not None:
has_all = _fix_tags(set(ut.ensure_iterable(has_all)))
flags_ = [
len(has_all.intersection(tags_)) == len(has_all) for tags_ in tags_list_
]
logic_func(flags, flags_, out=flags)
def _test_item(tags_, fields, op, compare):
t_flags = [any([compare(t, f) for f in fields]) for t in tags_]
num_passed = sum(t_flags)
flag = op(num_passed, 0)
return flag
def _flag_tags(tags_list, fields, op, compare):
flags = [_test_item(tags_, fields, op, compare) for tags_ in tags_list_]
return flags
def _exec_filter(flags, tags_list, fields, op, compare):
if fields is not None:
fields = ut.ensure_iterable(fields)
if ignore_case:
fields = [f.lower() for f in fields]
flags_ = _flag_tags(tags_list, fields, op, compare)
logic_func(flags, flags_, out=flags)
return flags
flags = _exec_filter(
flags, tags_list, any_startswith, operator.gt, six.text_type.startswith
)
flags = _exec_filter(flags, tags_list, in_any, operator.gt, operator.contains)
flags = _exec_filter(
flags, tags_list, any_endswith, operator.gt, six.text_type.endswith
)
flags = _exec_filter(
flags, tags_list, any_match, operator.gt, lambda t, f: re.match(f, t)
)
flags = _exec_filter(
flags, tags_list, none_match, operator.eq, lambda t, f: re.match(f, t)
)
return flags
if __name__ == '__main__':
r"""
CommandLine:
python -m utool.util_tags
python -m utool.util_tags --allexamples
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()