Source code for utool.util_list

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import operator
import six
import itertools
import warnings
import functools
from math import floor, ceil
from six.moves import zip, map, zip_longest, range, filter, reduce
from utool import util_iter
from utool import util_inject
from utool import util_str
from utool import util_type
from utool._internal.meta_util_six import get_funcname, set_funcname

print, rrr, profile = util_inject.inject2(__name__)

if util_type.HAVE_NUMPY:
    import numpy as np


# --- List Allocations ---


[docs]def emap(func, iter_, **kwargs): """ Eager version of the builtin map function. This provides the same functionality as python2 map. Note this is inefficient and should only be used when prototyping and debugging. Extended functionality supports passing common kwargs to all functions """ return [func(arg, **kwargs) for arg in iter_]
# return list(map(func, iter_))
[docs]def estarmap(func, iter_, **kwargs): """ Eager version of it.starmap from itertools Note this is inefficient and should only be used when prototyping and debugging. """ return [func(*arg, **kwargs) for arg in iter_]
[docs]def ezip(*args): """ Eager version of the builtin zip function. This provides the same functionality as python2 zip. Note this is inefficient and should only be used when prototyping and debugging. """ return list(zip(*args))
lmap = emap lzip = ezip lstarmap = estarmap
[docs]def maplen(iter_): return list(map(len, iter_))
[docs]def rebase_labels(label_list): counter = itertools.count(0) orig_to_new = {} for label in sorted(label_list): if label not in orig_to_new: orig_to_new[label] = six.next(counter) import utool as ut rebased_labels = ut.take(orig_to_new, label_list) return rebased_labels
[docs]def replace_nones(list_, repl=-1): r""" Recursively removes Nones in all lists and sublists and replaces them with the repl variable Args: list_ (list): repl (obj): replacement value Returns: list CommandLine: python -m utool.util_list --test-replace_nones Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> list_ = [None, 0, 1, 2] >>> repl = -1 >>> # execute function >>> repl_list = replace_nones(list_, repl) >>> # verify results >>> result = str(repl_list) >>> print(result) [-1, 0, 1, 2] """ repl_list = [ repl if item is None else (replace_nones(item, repl) if isinstance(item, list) else item) for item in list_ ] return repl_list
[docs]def recursive_replace(list_, target, repl=-1): r""" Recursively removes target in all lists and sublists and replaces them with the repl variable """ repl_list = [ recursive_replace(item, target, repl) if isinstance(item, (list, np.ndarray)) else (repl if item == target else item) for item in list_ ] return repl_list
[docs]def list_replace(list_, target, repl): r""" alias Recursively removes target in all lists and sublists and replaces them with the repl variable """ return recursive_replace(list_, target, repl)
[docs]def alloc_lists(num_alloc): """allocates space for a ``list`` of lists""" return [[] for _ in range(num_alloc)]
[docs]def alloc_nones(num_alloc): """allocates space for a ``list`` of Nones""" return [None] * num_alloc
# return [None for _ in range(num_alloc)]
[docs]def ensure_list_size(list_, size_): """Allocates more space if needbe. Ensures len(``list_``) == ``size_``. Args: list_ (list): ``list`` to extend size_ (int): amount to exent by """ lendiff = (size_) - len(list_) if lendiff > 0: extension = [None for _ in range(lendiff)] list_.extend(extension)
# --- List Searching --- #
[docs]def get_list_column_slice(list_, start=None, stop=None, stride=None): return list(util_iter.iget_list_column_slice(list_, start, stop, stride))
[docs]def take_column(list_, colx): r""" accepts a list of (indexables) and returns a list of indexables can also return a list of list of indexables if colx is a list Args: list_ (list): list of lists colx (int or list): index or key in each sublist get item Returns: list: list of selected items CommandLine: python -m utool.util_list --test-take_column Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [['a', 'b'], ['c', 'd']] >>> colx = 0 >>> result = take_column(list_, colx) >>> import utool as ut >>> result = ut.repr4(result, nl=False) >>> print(result) ['a', 'c'] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [['a', 'b'], ['c', 'd']] >>> colx = [1, 0] >>> result = take_column(list_, colx) >>> import utool as ut >>> result = ut.repr4(result, nl=False) >>> print(result) [['b', 'a'], ['d', 'c']] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [{'spam': 'EGGS', 'ham': 'SPAM'}, {'spam': 'JAM', 'ham': 'PRAM'}] >>> # colx can be a key or list of keys as well >>> colx = ['spam'] >>> result = take_column(list_, colx) >>> import utool as ut >>> result = ut.repr4(result, nl=False) >>> print(result) [['EGGS'], ['JAM']] """ # return list(util_iter.iget_list_column(list_, colx)) return list(util_iter.itake_column(list_, colx))
# if isinstance(colx, list): # # multi select # return [[row[colx_] for colx_ in colx] for row in list_] # else: # return [row[colx] for row in list_] get_list_column = take_column # def get_list_row(list_, rowx): # return list_[rowx]
[docs]def safeapply(func, *args, **kwargs): try: return func(*args, **kwargs) except Exception: return None
[docs]def safelen(list_): return safeapply(len, list_)
# return None if list_ is None else len(list_)
[docs]def safe_listget(list_, index, default='?'): """depricate""" if index >= len(list_): return default ret = list_[index] if ret is None: return default return ret
[docs]def listclip(list_, num, fromback=False): r""" DEPRICATE: use slices instead Args: list_ (list): num (int): Returns: sublist: CommandLine: python -m utool.util_list --test-listclip Example: >>> # ENABLE_DOCTEST >>> import utool as ut >>> # build test data >>> list_ = [1, 2, 3, 4, 5] >>> result_list = [] >>> # execute function >>> num = 3 >>> result_list += [ut.listclip(list_, num)] >>> num = 9 >>> result_list += [ut.listclip(list_, num)] >>> # verify results >>> result = ut.repr4(result_list) >>> print(result) [ [1, 2, 3], [1, 2, 3, 4, 5], ] Example: >>> # ENABLE_DOCTEST >>> import utool as ut >>> # build test data >>> list_ = [1, 2, 3, 4, 5] >>> result_list = [] >>> # execute function >>> num = 3 >>> result = ut.listclip(list_, num, fromback=True) >>> print(result) [3, 4, 5] """ if num is None: num_ = len(list_) else: num_ = min(len(list_), num) if fromback: sublist = list_[-num_:] else: sublist = list_[:num_] return sublist
[docs]def find_list_indexes(list_, items): """ Args: list_ (list): list of items to be searched items (list): list of items to find Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = ['a', 'b', 'c'] >>> items = ['d', 'c', 'b', 'f'] >>> index_list = find_list_indexes(list_, items) >>> result = ('index_list = %r' % (index_list,)) >>> print(result) index_list = [None, 2, 1, None] """ index_lookup = make_index_lookup(list_) index_list = [index_lookup.get(item) for item in items] # index_list = [listfind(list_, item) for item in items] return index_list
[docs]def listfind(list_, tofind): r""" get the position of item ``tofind`` in ``list_`` if it exists otherwise returns None Args: list_ (?): tofind (?): Returns: int: index of ``tofind`` in ``list_`` Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = ['a', 'b', 'c'] >>> tofind = 'b' >>> result = listfind(list_, tofind) >>> print(result) 1 """ try: return list_.index(tofind) except ValueError: return None
[docs]def search_list(text_list, pattern, flags=0): """ CommandLine: python -m utool.util_list --test-search_list Example: >>> # ENABLE_DOCTEST >>> import utool as ut >>> text_list = ['ham', 'jam', 'eggs', 'spam'] >>> pattern = '.am' >>> flags = 0 >>> (valid_index_list, valid_match_list) = ut.search_list(text_list, pattern, flags) >>> result = str(valid_index_list) >>> print(result) [0, 1, 3] """ import re import utool as ut match_list = [re.search(pattern, text, flags=flags) for text in text_list] valid_index_list = [ index for index, match in enumerate(match_list) if match is not None ] valid_match_list = ut.take(match_list, valid_index_list) return valid_index_list, valid_match_list
# --- List Modification --- #
[docs]def multi_replace(instr, search_list=[], repl_list=None): """ Does a string replace with a list of search and replacements TODO: rename """ repl_list = [''] * len(search_list) if repl_list is None else repl_list for ser, repl in zip(search_list, repl_list): instr = instr.replace(ser, repl) return instr
[docs]def flatten(list_): r""" Args: list_ (list): list of lists Returns: list: flat list CommandLine: python -m utool.util_list --test-flatten Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list_ = [['a', 'b'], ['c', 'd']] >>> unflat_list2 = flatten(list_) >>> result = ut.repr4(unflat_list2, nl=False) >>> print(result) ['a', 'b', 'c', 'd'] """ return list(util_iter.iflatten(list_))
[docs]def invertible_flatten1(unflat_list): r""" Flattens `unflat_list` but remember how to reconstruct the `unflat_list` Returns `flat_list` and the `reverse_list` with indexes into the `flat_list` Args: unflat_list (list): list of nested lists that we will flatten. Returns: tuple : (flat_list, reverse_list) CommandLine: python -m utool.util_list --exec-invertible_flatten1 --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> unflat_list = [[1, 2, 3], [4, 5], [6, 6]] >>> flat_list, reverse_list = invertible_flatten1(unflat_list) >>> result = ('flat_list = %s\n' % (ut.repr2(flat_list),)) >>> result += ('reverse_list = %s' % (ut.repr2(reverse_list),)) >>> print(result) flat_list = [1, 2, 3, 4, 5, 6, 6] reverse_list = [[0, 1, 2], [3, 4], [5, 6]] """ nextnum = functools.partial(six.next, itertools.count(0)) # Build an unflat list of flat indexes reverse_list = [[nextnum() for _ in tup] for tup in unflat_list] flat_list = flatten(unflat_list) return flat_list, reverse_list
[docs]def unflatten1(flat_list, reverse_list): """Rebuilds unflat list from invertible_flatten1 Args: flat_list (list): the flattened list reverse_list (list): the list which undoes flattenting Returns: unflat_list2: original nested list SeeAlso: invertible_flatten1 invertible_flatten2 unflatten2 """ unflat_list2 = [[flat_list[index] for index in tup] for tup in reverse_list] return unflat_list2
[docs]def accumulate(iterator): """ Notice: use itertools.accumulate in python > 3.2 """ total = 0 for item in iterator: total += item yield total
[docs]def total_flatten(unflat_list): """ unflat_list = [1, 2, [3, 4], [5, [9]]] Args: unflat_list (list): Returns: list: flat_list CommandLine: python -m utool.util_list --exec-total_flatten --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> unflat_list = [[[1, 2, 3], 4, 5], 9, [2, 3], [1, [2, 3, 4]], 1, 2, 3] >>> flat_list = total_flatten(unflat_list) >>> result = ('flat_list = %s' % (ut.repr2(flat_list),)) >>> print(result) """ import utool as ut next_list = unflat_list scalar_flags = [not ut.isiterable(item) for item in next_list] while not all(scalar_flags): unflatenized = [ [item] if flag else item for flag, item in zip(scalar_flags, next_list) ] flatter_list = ut.flatten(unflatenized) next_list = flatter_list scalar_flags = [not ut.isiterable(item) for item in next_list] flat_list = next_list return flat_list
[docs]def invertible_total_flatten(unflat_list): r""" Args: unflat_list (list): Returns: tuple: (flat_list, invert_levels) CommandLine: python -m utool.util_list --exec-invertible_total_flatten --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> unflat_list = [0, [[1, 2, 3], 4, 5], 9, [2, 3], [1, [2, 3, 4]], 1, 2, 3] >>> print('unflat_list = %r' % (unflat_list,)) >>> (flat_list, invert_levels) = invertible_total_flatten(unflat_list) >>> print('flat_list = %r' % (flat_list,)) >>> unflat_list2 = total_unflatten(flat_list, invert_levels) >>> print('unflat_list2 = %r' % (unflat_list2,)) >>> assert unflat_list2 == unflat_list >>> assert ut.depth_profile(flat_list) == 16 """ import utool as ut next_list = unflat_list scalar_flags = [not ut.isiterable(item) for item in next_list] invert_stack = [] # print('unflat_list = %r' % (unflat_list,)) while not all(scalar_flags): unflattenized = [ [item] if flag else item for flag, item in zip(scalar_flags, next_list) ] flatter_list, invert_part = ut.invertible_flatten1(unflattenized) # print('flatter_list = %r' % (flatter_list,)) for idx in ut.where(scalar_flags): invert_part[idx] = invert_part[idx][0] invert_stack.append(invert_part) next_list = flatter_list scalar_flags = [not ut.isiterable(item) for item in next_list] # invert_part = [None] * len(scalar_flags) # invert_stack.append(invert_part) invert_levels = invert_stack[::-1] flat_list = next_list return flat_list, invert_levels
[docs]def total_unflatten(flat_list, invert_levels): import utool as ut less_flat_list = flat_list # print('less_flat_list = %r' % (less_flat_list,)) for lx, level in enumerate(invert_levels): needs_unflatten = [ut.isiterable(x) for x in level] is_alreadyflat = ut.not_list(needs_unflatten) needs_unflatxs = ut.where(needs_unflatten) already_flatxs = ut.where(is_alreadyflat) invertinfo = ut.compress(level, needs_unflatten) unflat_part = ut.unflatten1(less_flat_list, invertinfo) flat_sortx = ut.take(level, already_flatxs) flat_part = ut.take(less_flat_list, flat_sortx) maxsize = len(already_flatxs) + len(needs_unflatxs) - 1 # (len(invertinfo) + sum(is_alreadyflat) + 1) groups = [flat_part, unflat_part] groupxs = [already_flatxs, needs_unflatxs] less_flat_list_ = ut.ungroup(groups, groupxs, maxsize) less_flat_list = less_flat_list_ unflat_list = less_flat_list return unflat_list
[docs]def invertible_flatten2(unflat_list): """ An alternative to invertible_flatten1 which uses cumsum Flattens ``list`` but remember how to reconstruct the unflat ``list`` Returns flat ``list`` and the unflat ``list`` with indexes into the flat ``list`` Args: unflat_list (list): Returns: tuple: flat_list, cumlen_list SeeAlso: invertible_flatten1 unflatten1 unflatten2 Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool >>> utool.util_list >>> unflat_list = [[5], [2, 3, 12, 3, 3], [9], [13, 3], [5]] >>> flat_list, cumlen_list = invertible_flatten2(unflat_list) >>> unflat_list2 = unflatten2(flat_list, cumlen_list) >>> assert unflat_list2 == unflat_list >>> result = ((flat_list, cumlen_list)) >>> print(result) ([5, 2, 3, 12, 3, 3, 9, 13, 3, 5], [1, 6, 7, 9, 10]) TODO: This flatten is faster fix it to be used everywhere Timeit: unflat_list = [[random.random() for _ in range(int(random.random() * 1000))] for __ in range(200)] unflat_arrs = list(map(np.array, unflat_list)) %timeit invertible_flatten2(unflat_list) %timeit invertible_flatten2_numpy(unflat_list) %timeit invertible_flatten2_numpy(unflat_arrs) Timeits: import utool unflat_list = aids_list1 flat_aids1, reverse_list = utool.invertible_flatten1(unflat_list) flat_aids2, cumlen_list = utool.invertible_flatten2(unflat_list) unflat_list1 = utool.unflatten1(flat_aids1, reverse_list) unflat_list2 = utool.unflatten2(flat_aids2, cumlen_list) assert list(map(list, unflat_list1)) == unflat_list2 print(utool.get_object_size_str(unflat_list, 'unflat_list ')) print(utool.get_object_size_str(flat_aids1, 'flat_aids1 ')) print(utool.get_object_size_str(flat_aids2, 'flat_aids2 ')) print(utool.get_object_size_str(reverse_list, 'reverse_list ')) print(utool.get_object_size_str(cumlen_list, 'cumlen_list ')) print(utool.get_object_size_str(unflat_list1, 'unflat_list1 ')) print(utool.get_object_size_str(unflat_list2, 'unflat_list2 ')) print('Timings 1:) %timeit utool.invertible_flatten1(unflat_list) %timeit utool.unflatten1(flat_aids1, reverse_list) print('Timings 2:) %timeit utool.invertible_flatten2(unflat_list) %timeit utool.unflatten2(flat_aids2, cumlen_list) """ sublen_list = list(map(len, unflat_list)) if not util_type.HAVE_NUMPY: cumlen_list = np.cumsum(sublen_list) # Build an unflat list of flat indexes else: cumlen_list = list(accumulate(sublen_list)) flat_list = flatten(unflat_list) return flat_list, cumlen_list
[docs]def invertible_flatten2_numpy(unflat_arrs, axis=0): """more numpy version TODO: move to vtool Args: unflat_arrs (list): list of ndarrays Returns: tuple: (flat_list, cumlen_list) CommandLine: python -m utool.util_list --test-invertible_flatten2_numpy Ignore: >>> # ENABLE_DOCTET >>> from utool.util_list import * # NOQA >>> unflat_arrs = [np.array([1, 2, 1]), np.array([5, 9]), np.array([4])] >>> (flat_list, cumlen_list) = invertible_flatten2_numpy(unflat_arrs) >>> result = str((flat_list, cumlen_list)) >>> print(result) (array([1, 2, 1, 5, 9, 4]), array([3, 5, 6])) """ cumlen_list = np.cumsum([arr.shape[axis] for arr in unflat_arrs]) flat_list = np.concatenate(unflat_arrs, axis=axis) return flat_list, cumlen_list
[docs]def unflatten2(flat_list, cumlen_list): """Rebuilds unflat list from invertible_flatten1 Args: flat_list (list): the flattened list cumlen_list (list): the list which undoes flattenting Returns: unflat_list2: original nested list SeeAlso: invertible_flatten1 invertible_flatten2 unflatten2 Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool >>> utool.util_list >>> flat_list = [5, 2, 3, 12, 3, 3, 9, 13, 3, 5] >>> cumlen_list = [ 1, 6, 7, 9, 10] >>> unflat_list2 = unflatten2(flat_list, cumlen_list) >>> result = (unflat_list2) >>> print(result) [[5], [2, 3, 12, 3, 3], [9], [13, 3], [5]] """ unflat_list2 = [ flat_list[low:high] for low, high in zip(itertools.chain([0], cumlen_list), cumlen_list) ] return unflat_list2
[docs]def unflat_unique_rowid_map(func, unflat_rowids, **kwargs): """ performs only one call to the underlying func with unique rowids the func must be some lookup function TODO: move this to a better place. CommandLine: python -m utool.util_list --test-unflat_unique_rowid_map:0 python -m utool.util_list --test-unflat_unique_rowid_map:1 Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> kwargs = {} >>> unflat_rowids = [[1, 2, 3], [2, 5], [1], []] >>> num_calls0 = [0] >>> num_input0 = [0] >>> def func0(rowids, num_calls0=num_calls0, num_input0=num_input0): ... num_calls0[0] += 1 ... num_input0[0] += len(rowids) ... return [rowid + 10 for rowid in rowids] >>> func = func0 >>> unflat_vals = unflat_unique_rowid_map(func, unflat_rowids, **kwargs) >>> result = [arr.tolist() for arr in unflat_vals] >>> print(result) >>> ut.assert_eq(num_calls0[0], 1) >>> ut.assert_eq(num_input0[0], 4) [[11, 12, 13], [12, 15], [11], []] Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> import numpy as np >>> kwargs = {} >>> unflat_rowids = [[1, 2, 3], [2, 5], [1], []] >>> num_calls1 = [0] >>> num_input1 = [0] >>> def func1(rowids, num_calls1=num_calls1, num_input1=num_input1, np=np): ... num_calls1[0] += 1 ... num_input1[0] += len(rowids) ... return [np.array([rowid + 10, rowid, 3]) for rowid in rowids] >>> func = func1 >>> unflat_vals = unflat_unique_rowid_map(func, unflat_rowids, **kwargs) >>> result = [arr.tolist() for arr in unflat_vals] >>> print(result) >>> ut.assert_eq(num_calls1[0], 1) >>> ut.assert_eq(num_input1[0], 4) [[[11, 1, 3], [12, 2, 3], [13, 3, 3]], [[12, 2, 3], [15, 5, 3]], [[11, 1, 3]], []] """ import utool as ut # First flatten the list, and remember the original dimensions flat_rowids, reverse_list = ut.invertible_flatten2(unflat_rowids) # Then make the input unique flat_rowids_arr = np.array(flat_rowids) unique_flat_rowids, inverse_unique = np.unique(flat_rowids_arr, return_inverse=True) # Then preform the lookup / implicit mapping unique_flat_vals = func(unique_flat_rowids, **kwargs) # Then broadcast unique values back to original flat positions flat_vals_ = np.array(unique_flat_vals)[inverse_unique] # flat_vals_ = np.array(unique_flat_vals).take(inverse_unique, axis=0) output_shape = tuple(list(flat_rowids_arr.shape) + list(flat_vals_.shape[1:])) flat_vals = np.array(flat_vals_).reshape(output_shape) # Then _unflatten the results to the original input dimensions unflat_vals = ut.unflatten2(flat_vals, reverse_list) return unflat_vals
[docs]def unpack_iterables(list_): import utool as ut if ut.isiterable(list_): return [new_item for item in list_ for new_item in unpack_iterables(item)] else: return [list_]
[docs]def safe_slice(list_, *args): """safe_slice(list_, [start], stop, [end], [step]) Slices list and truncates if out of bounds """ if len(args) == 3: start = args[0] stop = args[1] step = args[2] else: step = 1 if len(args) == 2: start = args[0] stop = args[1] else: start = 0 stop = args[0] len_ = len(list_) if stop > len_: stop = len_ return list_[slice(start, stop, step)]
# --- List Queries --- #
[docs]def allsame(list_, strict=True): """ checks to see if list is equal everywhere Args: list_ (list): Returns: True if all items in the list are equal """ if len(list_) == 0: return True first_item = list_[0] return list_all_eq_to(list_, first_item, strict)
[docs]def list_all_eq_to(list_, val, strict=True): """ checks to see if list is equal everywhere to a value Args: list_ (list): val : value to check against Returns: True if all items in the list are equal to val """ if util_type.HAVE_NUMPY and isinstance(val, np.ndarray): return all([np.all(item == val) for item in list_]) try: # FUTURE WARNING # FutureWarning: comparison to `None` will result in an elementwise object comparison in the future. with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=FutureWarning) flags = [item == val for item in list_] return all( [np.all(flag) if hasattr(flag, '__array__') else flag for flag in flags] ) # return all([item == val for item in list_]) except ValueError: if not strict: return all([repr(item) == repr(val) for item in list_]) else: raise
[docs]def get_dirty_items(item_list, flag_list): """ Returns each item in item_list where not flag in flag_list Args: item_list (list): flag_list (list): Returns: dirty_items """ assert len(item_list) == len(flag_list) dirty_items = [item for (item, flag) in zip(item_list, flag_list) if not flag] # print('num_dirty_items = %r' % len(dirty_items)) # print('item_list = %r' % (item_list,)) # print('flag_list = %r' % (flag_list,)) return dirty_items
[docs]def compress(item_list, flag_list): """ like np.compress but for lists Returns items in item list where the corresponding item in flag list is True Args: item_list (list): list of items to mask flag_list (list): list of booleans used as a mask Returns: list : filtered_items - masked items """ assert len(item_list) == len( flag_list ), 'lists should correspond. len(item_list)=%r len(flag_list)=%r' % ( len(item_list), len(flag_list), ) filtered_items = list(util_iter.iter_compress(item_list, flag_list)) return filtered_items
[docs]def zipflat(*args): return [flatten(tup) for tup in zip(*args)]
[docs]def ziptake(items_list, indexes_list): """ SeeAlso: vt.ziptake """ return [ take(list_, index_list) for list_, index_list in zip(items_list, indexes_list) ]
[docs]def zipcompress(items_list, flags_list): """ SeeAlso: vt.zipcompress """ return [compress(list_, flags) for list_, flags in zip(items_list, flags_list)]
[docs]def list_zipflatten(*items_lists): return [flatten(items) for items in zip(*items_lists)]
[docs]def list_compresstake(items_list, flags_list): return [compress(list_, flags) for list_, flags in zip(items_list, flags_list)]
[docs]def filter_items(item_list, flag_list): """ Returns items in item list where the corresponding item in flag list is True Args: item_list (list): flag_list (list): Returns: filtered_items SeeAlso: util_iter.iter_compress """ return compress(item_list, flag_list)
[docs]def filterfalse_items(item_list, flag_list): """ Returns items in item list where the corresponding item in flag list is true Args: item_list (list): list of items flag_list (list): list of truthy values Returns: filtered_items : items where the corresponding flag was truthy SeeAlso: util_iter.ifilterfalse_items """ assert len(item_list) == len(flag_list) filtered_items = list(util_iter.ifilterfalse_items(item_list, flag_list)) return filtered_items
[docs]def filter_Nones(item_list): """ Removes any nones from the list Args: item_list (list): Returns: sublist which does not contain Nones """ return list(util_iter.ifilter_Nones(item_list))
# --- List combinations --- #
[docs]def isect(list1, list2): r""" returns list1 elements that are also in list2. preserves order of list1 intersect_ordered Args: list1 (list): list2 (list): Returns: list: new_list Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list1 = ['featweight_rowid', 'feature_rowid', 'config_rowid', 'featweight_forground_weight'] >>> list2 = [u'featweight_rowid'] >>> result = intersect_ordered(list1, list2) >>> print(result) ['featweight_rowid'] Timeit: >>> def timeit_func(func, *args): >>> niter = 10 >>> times = [] >>> for count in range(niter): >>> with ut.Timer(verbose=False) as t: >>> _ = func(*args) >>> times.append(t.ellapsed) >>> return sum(times) / niter >>> >>> grid = { >>> 'size1': [1000, 5000, 10000, 50000], >>> 'size2': [1000, 5000, 10000, 50000], >>> #'overlap': [0, 1], >>> } >>> data = [] >>> for kw in ut.all_dict_combinations(grid): >>> pool = np.arange(kw['size1'] * 2) >>> size2 = size1 = kw['size1'] >>> size2 = kw['size2'] >>> list1 = (np.random.rand(size1) * size1).astype(np.int32).tolist() >>> list1 = ut.random_sample(pool, size1).tolist() >>> list2 = ut.random_sample(pool, size2).tolist() >>> list1 = set(list1) >>> list2 = set(list2) >>> kw['ut'] = timeit_func(ut.isect, list1, list2) >>> #kw['np1'] = timeit_func(np.intersect1d, list1, list2) >>> #kw['py1'] = timeit_func(lambda a, b: set.intersection(set(a), set(b)), list1, list2) >>> kw['py2'] = timeit_func(lambda a, b: sorted(set.intersection(set(a), set(b))), list1, list2) >>> data.append(kw) >>> >>> import pandas as pd >>> pd.options.display.max_rows = 1000 >>> pd.options.display.width = 1000 >>> df = pd.DataFrame.from_dict(data) >>> data_keys = list(grid.keys()) >>> other_keys = ut.setdiff(df.columns, data_keys) >>> df = df.reindex(data_keys + other_keys, axis=1) >>> df['abs_change'] = df['ut'] - df['py2'] >>> df['pct_change'] = df['abs_change'] / df['ut'] * 100 >>> #print(df.sort('abs_change', ascending=False)) >>> >>> print(str(df).split('\n')[0]) >>> for row in df.values: >>> argmin = row[len(data_keys):len(data_keys) + len(other_keys)].argmin() + len(data_keys) >>> print(' ' + ', '.join([ >>> '%6d' % (r) if x < len(data_keys) else ( >>> ut.color_text('%8.6f' % (r,), 'blue') >>> if x == argmin else '%8.6f' % (r,)) >>> for x, r in enumerate(row) >>> ])) >>> >>> %timeit ut.isect(list1, list2) >>> %timeit np.intersect1d(list1, list2, assume_unique=True) >>> %timeit set.intersection(set(list1), set(list2)) >>> >>> #def highlight_max(s): >>> # ''' >>> # highlight the maximum in a Series yellow. >>> # ''' >>> # is_max = s == s.max() >>> # return ['background-color: yellow' if v else '' for v in is_max] >>> #df.style.apply(highlight_max) """ set2 = set(list2) return [item for item in list1 if item in set2]
[docs]def union_ordered(*lists): return unique_ordered(flatten(lists))
[docs]def union(*lists, **kwargs): r""" Ignore: >>> %timeit len(reduce(set.union, map(set, x))) >>> %timeit len(ut.union(*x)) >>> %timeit len(ut.unique(ut.flatten(ut.lmap(np.unique, x)))) >>> %timeit len(ut.unique(ut.flatten(x))) >>> %timeit len(ut.union(*x)) >>> %timeit len(ut.list\_union(*x)) >>> %timeit len(set.union(*[set(list\_) for list\_ in lists])) >>> %timeit len(set.union(*(set(list\_) for list\_ in lists))) """ if kwargs.get('ordered', True): return union_ordered(*lists) else: return list_union(*lists)
[docs]def list_intersection(list1, list2): return set(list1).intersection(set(list2))
[docs]def list_issubset(list1, list2): return set(list1).issubset(set(list2))
[docs]def list_issuperset(list1, list2): return set(list1).issuperset(set(list2))
[docs]def list_isdisjoint(list1, list2): return set(list1).isdisjoint(set(list2))
[docs]def list_union(*lists): return set.union(*(set(list_) for list_ in lists))
# return set.union(*[set(list_) for list_ in lists])
[docs]def isect_indices(items1, items2): set1_ = set(items1) set2_ = set(items2) items_isect = set1_.intersection(set2_) idxs1 = find_list_indexes(items1, items_isect) idxs2 = find_list_indexes(items2, items_isect) return idxs1, idxs2
intersect_ordered = isect is_subset = list_issubset is_superset = list_issuperset issubset = list_issubset issuperset = list_issuperset isdisjoint = list_isdisjoint
[docs]def list_set_equal(list1, list2): return set(list1) == set(list2)
[docs]def is_subset_of_any(set_, other_sets): r""" returns True if set\_ is a subset of any set in other_sets Args: set\_ (set): other_sets (list of sets): Returns: bool: flag CommandLine: python -m utool.util_list --test-is_subset_of_any Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> set_ = {1, 2} >>> other_sets = [{1, 4}, {3, 2, 1}] >>> # execute function >>> result = is_subset_of_any(set_, other_sets) >>> # verify results >>> print(result) True Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> set_ = {1, 2} >>> other_sets = [{1, 4}, {3, 2}] >>> # execute function >>> result = is_subset_of_any(set_, other_sets) >>> # verify results >>> print(result) False """ set_ = set(set_) other_sets = map(set, other_sets) return any([set_.issubset(other_set) for other_set in other_sets])
[docs]def priority_sort(list_, priority): r""" Args: list_ (list): priority (list): desired order of items Returns: list: reordered_list CommandLine: python -m utool.util_list --test-priority_argsort Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [2, 4, 6, 8, 10] >>> priority = [8, 2, 6, 9] >>> reordered_list = priority_sort(list_, priority) >>> result = str(reordered_list) >>> print(result) [8, 2, 6, 4, 10] """ # remove requested priority items not in the list priority_ = setintersect_ordered(priority, list_) reordered_list = unique_ordered(priority_ + list_) return reordered_list
[docs]def priority_argsort(list_, priority): r""" Args: list_ (list): priority (list): desired order of items Returns: list: reordered_index_list Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list_ = [2, 4, 6, 8, 10] >>> priority = [8, 2, 6, 9] >>> sortx = priority_argsort(list_, priority) >>> reordered_list = priority_sort(list_, priority) >>> assert ut.take(list_, sortx) == reordered_list >>> result = str(sortx) >>> print(result) [3, 0, 2, 1, 4] """ reordered_list = priority_sort(list_, priority) # FIXME: inefficient sortx = [list_.index(item) for item in reordered_list] return sortx
[docs]def flag_unique_items(list_): r""" Returns a list of flags corresponding to the first time an item is seen Args: list\_ (list): list of items Returns: flag_list Timing: >>> import random >>> import utool as ut >>> >>> def random_items(n, m): >>> rng = random.Random(0) >>> return [rng.randint(0, n) for _ in range(m)] >>> >>> m = 1000 >>> >>> def method1(list\_): >>> seen = set() >>> def unseen(item): >>> if item in seen: >>> return False >>> seen.add(item) >>> return True >>> flag_list = [unseen(item) for item in list\_] >>> return flag_list >>> >>> def method2(list\_): >>> return ut.index_to_boolmask([list\_.index(x) for x in set(list\_)], len(list\_)) >>> >>> def method3(list\_): >>> return ut.index_to_boolmask(dict(zip(reversed(list\_), reversed(range(len(list\_))))).values(), len(list\_)) >>> >>> >>> import ubelt as ub >>> ub.Timerit.DEFAULT_VERBOSE = False >>> >>> ut.qtensure() >>> exps = [0, .25, .5, .75, 1, 2] >>> pnum = pt.make_pnum_nextgen(nSubplots=len(exps)) >>> current = ut.flag_unique_items >>> >>> for count, exp in ut.ProgIter(list(enumerate(exps, start=1))): >>> ydatas = ut.ddict(list) >>> xdata = [] >>> for m in ut.ProgIter(list(range(0, 10000, 100)), freq=1): >>> xdata.append(m) >>> num = 10 >>> n = int(m ** exp) >>> list\_ = random_items(n=n, m=m) >>> ydatas['method1'].append(ub.Timerit(num).call(method1, list\_)) >>> ydatas['method2'].append(ub.Timerit(num).call(method2, list\_)) >>> ydatas['method3'].append(ub.Timerit(num).call(method3, list\_)) >>> ydatas['current'].append(ub.Timerit(num).call(current, list\_)) >>> >>> # assert method1(list\_) == method3(list\_) >>> # assert method1(list\_) == current(list\_) >>> >>> pt.multi_plot( >>> xdata, list(ydatas.values()), label_list=list(ydatas.keys()), >>> ylabel='time', title=str(exp), fnum=1, pnum=pnum()) """ len_ = len(list_) item_to_index = dict(zip(reversed(list_), reversed(range(len_)))) flag_list = index_to_boolmask(item_to_index.values(), len_) return flag_list
unique_flags = flag_unique_items
[docs]def iflag_unique_items(list_): """ Returns a list of flags corresponding to the first time an item is seen Args: list_ (list): list of items Returns: flag_iter """ seen = set() def unseen(item): if item in seen: return False seen.add(item) return True flag_iter = (unseen(item) for item in list_) return flag_iter
[docs]def unique_ordered(list_): """ Returns unique items in ``list_`` in the order they were seen. Args: list_ (list): Returns: list: unique_list - unique list which maintains order CommandLine: python -m utool.util_list --exec-unique_ordered Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [4, 6, 6, 0, 6, 1, 0, 2, 2, 1] >>> unique_list = unique_ordered(list_) >>> result = ('unique_list = %s' % (str(unique_list),)) >>> print(result) unique_list = [4, 6, 0, 1, 2] """ list_ = list(list_) flag_list = flag_unique_items(list_) unique_list = compress(list_, flag_list) return unique_list
[docs]def unique_unordered(list_): r""" wrapper around list(set(list\_)) """ return list(set(list_))
[docs]def unique_indices(list_): return where(flag_unique_items(list_))
[docs]def unique(list_, ordered=True): """ Returns unique items in ``list_``. Generally, unordered (*should be*) faster. """ if ordered: return unique_ordered(list_) else: return unique_unordered(list_)
[docs]def flat_unique(*lists_, **kwargs): """returns items unique across all lists""" return unique(flatten(lists_), **kwargs)
[docs]def setdiff(list1, list2): """ returns list1 elements that are not in list2. preserves order of list1 Args: list1 (list): list2 (list): Returns: list: new_list Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list1 = ['featweight_rowid', 'feature_rowid', 'config_rowid', 'featweight_forground_weight'] >>> list2 = [u'featweight_rowid'] >>> new_list = setdiff_ordered(list1, list2) >>> result = ut.repr4(new_list, nl=False) >>> print(result) ['feature_rowid', 'config_rowid', 'featweight_forground_weight'] """ set2 = set(list2) return [item for item in list1 if item not in set2]
[docs]def setdiff_flags(list1, list2): return list(isetdiff_flags(list1, list2))
[docs]def isetdiff_flags(list1, list2): """ move to util_iter """ set2 = set(list2) return (item not in set2 for item in list1)
setdiff_ordered = setdiff
[docs]def setintersect_ordered(list1, list2): """ returns list1 elements that are in list2. preserves order of list1 setintersect_ordered Args: list1 (list): list2 (list): Returns: list: new_list Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list1 = [1, 2, 3, 5, 8, 13, 21] >>> list2 = [6, 4, 2, 21, 8] >>> new_list = setintersect_ordered(list1, list2) >>> result = new_list >>> print(result) [2, 8, 21] """ return [item for item in list1 if item in set(list2)]
setintersect = setintersect_ordered
[docs]def sortedby(item_list, key_list, reverse=False): """sorts ``item_list`` using key_list Args: list_ (list): list to sort key_list (list): list to sort by reverse (bool): sort order is descending (largest first) if reverse is True else acscending (smallest first) Returns: list : ``list_`` sorted by the values of another ``list``. defaults to ascending order SeeAlso: sortedby2 Example: >>> # ENABLE_DOCTEST >>> import utool >>> list_ = [1, 2, 3, 4, 5] >>> key_list = [2, 5, 3, 1, 5] >>> result = utool.sortedby(list_, key_list, reverse=True) >>> print(result) [5, 2, 3, 1, 4] """ assert len(item_list) == len(key_list), 'Expected same len. Got: %r != %r' % ( len(item_list), len(key_list), ) sorted_list = [ item for (key, item) in sorted(list(zip(key_list, item_list)), reverse=reverse) ] return sorted_list
[docs]def sortedby2(item_list, *args, **kwargs): """sorts ``item_list`` using key_list Args: item_list (list): list to sort *args: multiple lists to sort by **kwargs: reverse (bool): sort order is descending if True else acscending Returns: list : ``list_`` sorted by the values of another ``list``. defaults to ascending order CommandLine: python -m utool.util_list --exec-sortedby2 --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> item_list = [1, 2, 3, 4, 5] >>> key_list1 = [1, 1, 2, 3, 4] >>> key_list2 = [2, 1, 4, 1, 1] >>> args = (key_list1, key_list2) >>> kwargs = dict(reverse=False) >>> result = ut.sortedby2(item_list, *args, **kwargs) >>> print(result) [2, 1, 3, 4, 5] Example: >>> # ENABLE_DOCTEST >>> # Python 3 Compatibility Test >>> import utool as ut >>> item_list = [1, 2, 3, 4, 5] >>> key_list1 = ['a', 'a', 2, 3, 4] >>> key_list2 = ['b', 'a', 4, 1, 1] >>> args = (key_list1, key_list2) >>> kwargs = dict(reverse=False) >>> result = ut.sortedby2(item_list, *args, **kwargs) >>> print(result) [3, 4, 5, 2, 1] """ assert all([len(item_list) == len_ for len_ in map(len, args)]) reverse = kwargs.get('reverse', False) key = operator.itemgetter(*range(1, len(args) + 1)) tup_list = list(zip(item_list, *args)) # print(tup_list) try: sorted_tups = sorted(tup_list, key=key, reverse=reverse) except TypeError: # Python 3 does not allow sorting mixed types def keyfunc(tup): return tuple(map(str, tup[1:])) sorted_tups = sorted(tup_list, key=keyfunc, reverse=reverse) sorted_list = [tup[0] for tup in sorted_tups] return sorted_list
[docs]def unflat_take(items_list, unflat_index_list): r""" Returns nested subset of items_list Args: items_list (list): unflat_index_list (list): nested list of indices CommandLine: python -m utool.util_list --exec-unflat_take SeeAlso: ut.take Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> items_list = [1, 2, 3, 4, 5] >>> unflat_index_list = [[0, 1], [2, 3], [0, 4]] >>> result = unflat_take(items_list, unflat_index_list) >>> print(result) [[1, 2], [3, 4], [1, 5]] """ return [ unflat_take(items_list, xs) if isinstance(xs, list) else take(items_list, xs) for xs in unflat_index_list ]
[docs]def argsort(*args, **kwargs): """ like np.argsort but for lists Args: *args: multiple lists to sort by **kwargs: reverse (bool): sort order is descending if True else acscending CommandLine: python -m utool.util_list argsort Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> result = ut.argsort({'a': 3, 'b': 2, 'c': 100}) >>> print(result) """ if len(args) == 1 and isinstance(args[0], dict): dict_ = args[0] index_list = list(dict_.keys()) value_list = list(dict_.values()) return sortedby2(index_list, value_list) else: index_list = list(range(len(args[0]))) return sortedby2(index_list, *args, **kwargs)
[docs]def argsort2(indexable, key=None, reverse=False): """ Returns the indices that would sort a indexable object. This is similar to np.argsort, but it is written in pure python and works on both lists and dictionaries. Args: indexable (list or dict): indexable to sort by Returns: list: indices: list of indices such that sorts the indexable Example: >>> # DISABLE_DOCTEST >>> import utool as ut >>> # argsort works on dicts >>> dict_ = indexable = {'a': 3, 'b': 2, 'c': 100} >>> indices = ut.argsort2(indexable) >>> assert list(ut.take(dict_, indices)) == sorted(dict_.values()) >>> # argsort works on lists >>> indexable = [100, 2, 432, 10] >>> indices = ut.argsort2(indexable) >>> assert list(ut.take(indexable, indices)) == sorted(indexable) >>> # argsort works on iterators >>> indexable = reversed(range(100)) >>> indices = ut.argsort2(indexable) >>> assert indices[0] == 99 """ # Create an iterator of value/key pairs if isinstance(indexable, dict): vk_iter = ((v, k) for k, v in indexable.items()) else: vk_iter = ((v, k) for k, v in enumerate(indexable)) # Sort by values and extract the keys if key is None: indices = [k for v, k in sorted(vk_iter, reverse=reverse)] else: indices = [ k for v, k in sorted(vk_iter, key=lambda vk: key(vk[0]), reverse=reverse) ] return indices
[docs]def argmax(input_, multi=False): r""" Returns index / key of the item with the largest value. Args: input\_ (dict or list): References: http://stackoverflow.com/questions/16945518/python-argmin-argmax Ignore: list\_ = np.random.rand(10000).tolist() %timeit list\_.index(max(list\_)) %timeit max(enumerate(list\_), key=operator.itemgetter(1))[0] %timeit max(enumerate(list\_), key=lambda x: x[1])[0] %timeit max(range(len(list\_)), key=list\_.__getitem__) input\_ = dict\_ list\_ = np.random.rand(100000).tolist() dict\_ = {str(ut.random_uuid()): x for x in list\_} %timeit list(input\_.keys())[ut.argmax(list(input\_.values()))] %timeit max(input\_.items(), key=operator.itemgetter(1))[0] Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * >>> import utool as ut >>> input_ = [1, 2, 3, 3, 2, 3, 2, 1] >>> ut.argmax(input_, multi=True) >>> input_ = {1: 4, 2: 2, 3: 3, 3: 4} >>> ut.argmax(input_, multi=True) """ if multi: if isinstance(input_, dict): keys = list(input_.keys()) values = list(input_.values()) return [keys[idx] for idx in argmax(values, multi=multi)] else: return where(equal([max(input_)], input_)) else: if isinstance(input_, dict): # its crazy, but this is faster # max(input_.items(), key=operator.itemgetter(1))[0] return list(input_.keys())[argmax(list(input_.values()))] elif hasattr(input_, 'index'): return input_.index(max(input_)) else: return max(enumerate(input_), key=operator.itemgetter(1))[0]
[docs]def argmin(input_, key=None): """ Returns index / key of the item with the smallest value. Args: input_ (dict or list): Note: a[argmin(a, key=key)] == min(a, key=key) """ # if isinstance(input_, dict): # return list(input_.keys())[argmin(list(input_.values()))] # elif hasattr(input_, 'index'): # return input_.index(min(input_)) # else: # return min(enumerate(input_), key=operator.itemgetter(1))[0] if isinstance(input, dict): return list(input.keys())[argmin(list(input.values()), key=key)] else: if key is None: def _key(item): return item[1] else: def _key(item): return key(item[1]) return min(enumerate(input), key=_key)[0]
[docs]def index_complement(index_list, len_=None): """ Returns the other indicies in a list of length ``len_`` """ mask1 = index_to_boolmask(index_list, len_) mask2 = not_list(mask1) index_list_bar = list_where(mask2) return index_list_bar
[docs]def take_complement(list_, index_list): """Returns items in ``list_`` not indexed by index_list""" mask = not_list(index_to_boolmask(index_list, len(list_))) return compress(list_, mask)
[docs]def none_take(list_, index_list): """ Like take but indices can be None SeeAlso: ut.take """ return [None if index is None else list_[index] for index in index_list]
[docs]def take(list_, index_list): """ Selects a subset of a list based on a list of indices. This is similar to np.take, but pure python. Args: list_ (list): some indexable object index_list (list, slice, int): some indexing object Returns: list or scalar: subset of the list CommandLine: python -m utool.util_list --test-take SeeAlso: ut.dict_take ut.dict_subset ut.none_take ut.compress Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [0, 1, 2, 3] >>> index_list = [2, 0] >>> result = take(list_, index_list) >>> print(result) [2, 0] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [0, 1, 2, 3] >>> index = 2 >>> result = take(list_, index) >>> print(result) 2 Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [0, 1, 2, 3] >>> index = slice(1, None, 2) >>> result = take(list_, index) >>> print(result) [1, 3] """ try: return [list_[index] for index in index_list] except TypeError: return list_[index_list]
# if util_iter.isiterable(index_list): # else: # def take # def take2(item_list, indicies, axis): # def _get_axes(list_, axis); # pass
[docs]def take_percentile(arr, percent): """take the top `percent` items in a list rounding up""" size = len(arr) stop = min(int(size * percent), len(arr)) return arr[0:stop]
[docs]def snapped_slice(size, frac, n): r""" Creates a slice spanning `n` items in a list of length `size` at position `frac`. Args: size (int): length of the list frac (float): position in the range [0, 1] n (int): number of items in the slice Returns: slice: slice object that best fits the criteria SeeAlso: take_percentile_parts Example: Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> print(snapped_slice(0, 0, 10)) >>> print(snapped_slice(1, 0, 10)) >>> print(snapped_slice(100, 0, 10)) >>> print(snapped_slice(9, 0, 10)) >>> print(snapped_slice(100, 1, 10)) pass """ if size < n: n = size start = int(size * frac - ceil(n / 2)) + 1 stop = int(size * frac + floor(n / 2)) + 1 # slide to the front or the back buf = 0 if stop >= size: buf = size - stop elif start < 0: buf = 0 - start stop += buf start += buf assert stop <= size, 'out of bounds [%r, %r]' % (stop, start) sl = slice(start, stop) return sl
[docs]def take_around_percentile(arr, frac, n): return arr[snapped_slice(len(arr), frac, n)]
[docs]def flag_percentile_parts(arr, front=None, mid=None, back=None): slices = [] if front: slices += [snapped_slice(len(arr), 0.0, front)] if mid: slices += [snapped_slice(len(arr), 0.5, mid)] if back: slices += [snapped_slice(len(arr), 1.0, back)] import itertools as it indices = sorted( set(it.chain.from_iterable(range(*sl.indices(len(arr))) for sl in slices)) ) flags = index_to_boolmask(indices, len(arr)) return flags
[docs]def take_percentile_parts(arr, front=None, mid=None, back=None): r""" Take parts from front, back, or middle of a list Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> arr = list(range(20)) >>> front = 3 >>> mid = 3 >>> back = 3 >>> result = take_percentile_parts(arr, front, mid, back) >>> print(result) [0, 1, 2, 9, 10, 11, 17, 18, 19] """ slices = [] if front: slices += [snapped_slice(len(arr), 0.0, front)] if mid: slices += [snapped_slice(len(arr), 0.5, mid)] if back: slices += [snapped_slice(len(arr), 1.0, back)] parts = flatten([arr[sl] for sl in slices]) return parts
[docs]def list_inverse_take(list_, index_list): r""" Args: list\_ (list): list in sorted domain index_list (list): index list of the unsorted domain Note: Seems to be logically equivalent to ut.take(list\_, ut.argsort(index_list)), but faster Returns: list: output_list\_ - the input list in the unsorted domain CommandLine: python -m utool.util_list --test-list_inverse_take Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> # build test data >>> rank_list = [3, 2, 4, 1, 9, 2] >>> prop_list = [0, 1, 2, 3, 4, 5] >>> index_list = ut.argsort(rank_list) >>> sorted_prop_list = ut.take(prop_list, index_list) >>> # execute function >>> list_ = sorted_prop_list >>> output_list_ = list_inverse_take(list_, index_list) >>> output_list2_ = ut.take(list_, ut.argsort(index_list)) >>> assert output_list_ == prop_list >>> assert output_list2_ == prop_list >>> # verify results >>> result = str(output_list_) >>> print(result) Timeit: %timeit list_inverse_take(list|_, index_list) %timeit ut.take(list|_, ut.argsort(index_list)) """ output_list_ = [None] * len(index_list) for item, index in zip(list_, index_list): output_list_[index] = item return output_list_
[docs]def broadcast_zip(list1, list2): r""" Zips elementwise pairs between list1 and list2. Broadcasts the first dimension if a single list is of length 1. Aliased as bzip Args: list1 (list): list2 (list): Returns: list: list of pairs SeeAlso: util_dict.dzip Raises: ValueError: if the list dimensions are not broadcastable Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> assert list(bzip([1, 2, 3], [4])) == [(1, 4), (2, 4), (3, 4)] >>> assert list(bzip([1, 2, 3], [4, 4, 4])) == [(1, 4), (2, 4), (3, 4)] >>> assert list(bzip([1], [4, 4, 4])) == [(1, 4), (1, 4), (1, 4)] >>> ut.assert_raises(ValueError, bzip, [1, 2, 3], []) >>> ut.assert_raises(ValueError, bzip, [], [4, 5, 6]) >>> ut.assert_raises(ValueError, bzip, [], [4]) >>> ut.assert_raises(ValueError, bzip, [1, 2], [4, 5, 6]) >>> ut.assert_raises(ValueError, bzip, [1, 2, 3], [4, 5]) """ try: len(list1) except TypeError: list1 = list(list1) try: len(list2) except TypeError: list2 = list(list2) # if len(list1) == 0 or len(list2) == 0: # # Corner case where either list is empty # return [] if len(list1) == 1 and len(list2) > 1: list1 = list1 * len(list2) elif len(list1) > 1 and len(list2) == 1: list2 = list2 * len(list1) elif len(list1) != len(list2): raise ValueError( 'out of alignment len(list1)=%r, len(list2)=%r' % (len(list1), len(list2)) ) # return list(zip(list1, list2)) return zip(list1, list2)
[docs]def where(flag_list): """takes flags returns indexes of True values""" return [index for index, flag in enumerate(flag_list) if flag]
[docs]def equal(list1, list2): """takes flags returns indexes of True values""" return [item1 == item2 for item1, item2 in broadcast_zip(list1, list2)]
[docs]def where_not_None(item_list): """returns list of indexes of non None values SeeAlso: flag_None_items """ return [index for index, item in enumerate(item_list) if item is not None]
[docs]def flag_None_items(list_): return [item is None for item in list_]
[docs]def flag_not_None_items(list_): return [item is not None for item in list_]
[docs]def scalar_input_map(func, input_): """ Map like function Args: func: function to apply input_ : either an iterable or scalar value Returns: If ``input_`` is iterable this function behaves like map otherwise applies func to ``input_`` """ if util_iter.isiterable(input_): return list(map(func, input_)) else: return func(input_)
[docs]def partial_imap_1to1(func, si_func): """a bit messy DEPRICATE """ @functools.wraps(si_func) def wrapper(input_): if not util_iter.isiterable(input_): return func(si_func(input_)) else: return list(map(func, si_func(input_))) set_funcname( wrapper, util_str.get_callable_name(func) + '_mapper_' + get_funcname(si_func) ) return wrapper
[docs]def sample_zip(items_list, num_samples, allow_overflow=False, per_bin=1): """Helper for sampling Given a list of lists, samples one item for each list and bins them into num_samples bins. If all sublists are of equal size this is equivilent to a zip, but otherewise consecutive bins will have monotonically less elemements # Doctest doesn't work with assertionerror #util_list.sample_zip(items_list, 2) #... #AssertionError: Overflow occured Args: items_list (list): num_samples (?): allow_overflow (bool): per_bin (int): Returns: tuple : (samples_list, overflow_samples) Example: >>> # DISABLE_DOCTEST >>> from utool import util_list >>> items_list = [[1, 2, 3, 4, 0], [5, 6, 7], [], [8, 9], [10]] >>> util_list.sample_zip(items_list, 5) ... [[1, 5, 8, 10], [2, 6, 9], [3, 7], [4], [0]] >>> util_list.sample_zip(items_list, 2, allow_overflow=True) ... ([[1, 5, 8, 10], [2, 6, 9]], [3, 7, 4]) >>> util_list.sample_zip(items_list, 4, allow_overflow=True, per_bin=2) ... ([[1, 5, 8, 10, 2, 6, 9], [3, 7, 4], [], []], [0]) """ # Prealloc a list of lists samples_list = [[] for _ in range(num_samples)] # Sample the ix-th value from every list samples_iter = zip_longest(*items_list) sx = 0 for ix, samples_ in zip(range(num_samples), samples_iter): samples = filter_Nones(samples_) samples_list[sx].extend(samples) # Put per_bin from each sublist into a sample if (ix + 1) % per_bin == 0: sx += 1 # Check for overflow if allow_overflow: overflow_samples = flatten([filter_Nones(samples_) for samples_ in samples_iter]) return samples_list, overflow_samples else: try: samples_iter.next() except StopIteration: pass else: raise AssertionError('Overflow occured') return samples_list
[docs]def sample_lists(items_list, num=1, seed=None): r""" Args: items_list (list): num (int): (default = 1) seed (None): (default = None) Returns: list: samples_list CommandLine: python -m utool.util_list --exec-sample_lists Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> items_list = [[], [1, 2, 3], [4], [5, 6], [7, 8, 9, 10]] >>> num = 2 >>> seed = 0 >>> samples_list = sample_lists(items_list, num, seed) >>> result = ('samples_list = %s' % (str(samples_list),)) >>> print(result) samples_list = [[], [3, 2], [4], [5, 6], [10, 9]] """ if seed is not None: rng = np.random.RandomState(seed) else: rng = np.random def random_choice(items, num): size = min(len(items), num) return rng.choice(items, size, replace=False).tolist() samples_list = [ random_choice(items, num) if len(items) > 0 else [] for items in items_list ] return samples_list
[docs]def strided_sample(items, num, offset=0): r""" Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> items = [1, 2, 3, 4, 5] >>> num = 3 >>> offset = 0 >>> # execute function >>> sample_items = strided_sample(items, num, offset) >>> # verify results >>> result = str(sample_items) >>> print(result) """ import math stride = max(int(math.ceil(len(items) / num)), 1) sample_items = items[offset::stride] return sample_items
[docs]def issorted(list_, op=operator.le): """ Determines if a list is sorted Args: list_ (list): op (func): sorted operation (default=operator.le) Returns: bool : True if the list is sorted """ return all(op(list_[ix], list_[ix + 1]) for ix in range(len(list_) - 1))
[docs]def find_nonconsec_values(values, min_=None, max_=None): """ Determines if a list of values is consecutive (ascending) Args: values (list): list of values, sorted and unique min_(int): minimum value in range defaults min(values) max_(int): maximum value in range defaults max(values) Returns: missing_values: missing values that would make the list consecutive CommandLine: python -m utool.util_list --test-find_nonconsec_values Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import numpy as np >>> values = np.array([-2, 1, 2, 10]) >>> result = find_nonconsec_values(values) >>> print(result) [-1, 0, 3, 4, 5, 6, 7, 8, 9] """ # values = sorted(set(values)) if min_ is None: min_ = values[0] if max_ is None: max_ = values[-1] valx = 0 missing_values = [] for check in range(min_, max_ + 1): if values[valx] != check: missing_values.append(check) else: valx += 1 return missing_values
[docs]def group_consecutives(data, stepsize=1): """ Return list of consecutive lists of numbers from data (number list). References: http://stackoverflow.com/questions/7352684/how-to-find-the-groups-of-consecutive-elements-from-an-array-in-numpy """ run = [] result = [run] expect = None for item in data: if (item == expect) or (expect is None): run.append(item) else: run = [item] result.append(run) expect = item + stepsize return result
[docs]def group_consecutives_numpy(data, stepsize=1): """ Args: data (?): stepsize (int): Returns: list: list of ndarrays References: http://stackoverflow.com/questions/7352684/how-to-find-the-groups-of-consecutive-elements-from-an-array-in-numpy CommandLine: python -m utool.util_list --test-group_consecutives Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> data = np.array([ 0, 1, 2, 3, 4, 320, 636, 637, 638, 639]) >>> stepsize = 1 >>> # execute function >>> result = group_consecutives(data, stepsize) >>> # verify results >>> print(result) [array([0, 1, 2, 3, 4]), array([320]), array([636, 637, 638, 639])] Timeit: %timeit group_consecutives_numpy(data, stepsize) # 14.8 µs per loop %timeit group_consecutives(data, stepsize) # 4.47 µs per loop """ return np.split(data, np.where(np.diff(data) != stepsize)[0] + 1)
[docs]def debug_consec_list(list_): """ Returns: tuple of (missing_items, missing_indices, duplicate_items) """ if not issorted(list_): print('warning list is not sorted. indices will not match') sortedlist = sorted(list_) start = sortedlist[0] last = start - 1 missing_vals = [] missing_indices = [] duplicate_items = [] for count, item in enumerate(sortedlist): diff = item - last if diff > 1: missing_indices.append(count) for miss in range(last + 1, last + diff): missing_vals.append(miss) elif diff == 0: duplicate_items.append(item) elif diff == 1: # Expected case pass else: raise AssertionError('We sorted the list. diff can not be negative') last = item return missing_vals, missing_indices, duplicate_items
[docs]def find_duplicate_items(items, k=2): r""" Args: items (list): Returns: dict: duplicate_map of indexes CommandLine: python -m utool.util_list --test-find_duplicate_items Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> items = [0, 1, 2, 3, 3, 0, 12, 2, 9] >>> duplicate_map = find_duplicate_items(items) >>> result = str(duplicate_map) >>> print(result) """ import utool as ut # Build item histogram duplicate_map = ut.ddict(list) for count, item in enumerate(items): duplicate_map[item].append(count) # remove singleton items singleton_keys = [] for key in six.iterkeys(duplicate_map): if len(duplicate_map[key]) == 1: singleton_keys.append(key) for key in singleton_keys: del duplicate_map[key] duplicate_map = dict(duplicate_map) return duplicate_map
# get_non_consecutive_positions = debug_consec_list
[docs]def duplicates_exist(items): """returns if list has duplicates""" return len(items) - len(set(items)) != 0
[docs]def isunique(items): return not duplicates_exist(items)
[docs]def debug_duplicate_items(items, *args, **kwargs): import utool as ut pad_stdout = kwargs.get('pad_stdout', True) if pad_stdout: print('') varname = ut.get_varname_from_locals(items, ut.get_parent_frame().f_locals) print('[util_list] +--- DEBUG DUPLICATE ITEMS %r ---' % (varname,)) def printfn(msg): print('[util_list] |' + msg) # with ut.Indenter('[util_list] | '): duplicate_map = ut.find_duplicate_items(items) printkw = {'printfn': printfn} ut.print_duplicate_map(duplicate_map, *args, **printkw) print('[util_list] L--- FINISH DEBUG DUPLICATE ITEMS ---') if pad_stdout: print('') return duplicate_map
[docs]def list_depth(list_, func=max, _depth=0): """ Returns the deepest level of nesting within a list of lists Args: list_ : a nested listlike object func : depth aggregation strategy (defaults to max) _depth : internal var Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [[[[[1]]], [3]], [[1], [3]], [[1], [3]]] >>> result = (list_depth(list_, _depth=0)) >>> print(result) """ depth_list = [ list_depth(item, func=func, _depth=_depth + 1) for item in list_ if util_type.is_listlike(item) ] if len(depth_list) > 0: return func(depth_list) else: return _depth
[docs]def depth(sequence, func=max, _depth=0): """ Find the nesting depth of a nested sequence """ if isinstance(sequence, dict): sequence = list(sequence.values()) depth_list = [ depth(item, func=func, _depth=_depth + 1) for item in sequence if (isinstance(item, dict) or util_type.is_listlike(item)) ] if len(depth_list) > 0: return func(depth_list) else: return _depth
[docs]def list_deep_types(list_): """ Returns all types in a deep list """ type_list = [] for item in list_: if util_type.is_listlike(item): type_list.extend(list_deep_types(item)) else: type_list.append(type(item)) return type_list
[docs]def depth_profile( list_, max_depth=None, compress_homogenous=True, compress_consecutive=False, new_depth=False, ): r""" Returns a nested list corresponding the shape of the nested structures lists represent depth, tuples represent shape. The values of the items do not matter. only the lengths. Args: list_ (list): max_depth (None): compress_homogenous (bool): compress_consecutive (bool): experimental CommandLine: python -m utool.util_list --test-depth_profile Setup: >>> from utool.util_list import * # NOQA Example: >>> # ENABLE_DOCTEST >>> list_ = [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] >>> result = depth_profile(list_) >>> print(result) (2, 3, 4) Example: >>> # ENABLE_DOCTEST >>> list_ = [[[[[1]]], [3, 4, 33]], [[1], [2, 3], [4, [5, 5]]], [1, 3]] >>> result = depth_profile(list_) >>> print(result) [[(1, 1, 1), 3], [1, 2, [1, 2]], 2] Example: >>> # ENABLE_DOCTEST >>> list_ = [[[[[1]]], [3, 4, 33]], [[1], [2, 3], [4, [5, 5]]], [1, 3]] >>> result = depth_profile(list_, max_depth=1) >>> print(result) [[(1, '1'), 3], [1, 2, [1, '2']], 2] Example: >>> # ENABLE_DOCTEST >>> list_ = [[[1, 2], [1, 2, 3]], None] >>> result = depth_profile(list_, compress_homogenous=True) >>> print(result) [[2, 3], 1] Example: >>> # ENABLE_DOCTEST >>> list_ = [[3, 2], [3, 2], [3, 2], [3, 2], [3, 2], [3, 2], [9, 5, 3], [2, 2]] >>> result = depth_profile(list_, compress_homogenous=True, compress_consecutive=True) >>> print(result) [2] * 6 + [3, 2] Example: >>> # ENABLE_DOCTEST >>> list_ = [[[3, 9], 2], [[3, 9], 2], [[3, 9], 2], [[3, 9], 2]] #, [3, 2], [3, 2]] >>> result = depth_profile(list_, compress_homogenous=True, compress_consecutive=True) >>> print(result) (4, [2, 1]) Example: >>> # ENABLE_DOCTEST >>> list_ = [[[[1, 2]], [1, 2]], [[[1, 2]], [1, 2]], [[[0, 2]], [1]]] >>> result1 = depth_profile(list_, compress_homogenous=True, compress_consecutive=False) >>> result2 = depth_profile(list_, compress_homogenous=True, compress_consecutive=True) >>> result = str(result1) + '\n' + str(result2) >>> print(result) [[(1, 2), 2], [(1, 2), 2], [(1, 2), 1]] [[(1, 2), 2]] * 2 + [[(1, 2), 1]] Example: >>> # ENABLE_DOCTEST >>> list_ = [[{'a': [1, 2], 'b': [3, 4, 5]}, [1, 2, 3]], None] >>> result = depth_profile(list_, compress_homogenous=True) >>> print(result) Example: >>> # ENABLE_DOCTEST >>> list_ = [[[1]], [[[1, 1], [1, 1]]], [[[[1, 3], 1], [[1, 3, 3], 1, 1]]]] >>> result = depth_profile(list_, compress_homogenous=True) >>> print(result) Example: >>> # ENABLE_DOCTEST >>> list_ = [] >>> result = depth_profile(list_) >>> print(result) # THIS IS AN ERROR??? SHOULD BE #[1, 1], [1, 2, 2], (1, ([1, 2]), ( Example: >>> # ENABLE_DOCTEST >>> fm1 = [[0, 0], [0, 0]] >>> fm2 = [[0, 0], [0, 0], [0, 0]] >>> fm3 = [[0, 0], [0, 0], [0, 0], [0, 0]] >>> list_ = [0, 0, 0] >>> list_ = [fm1, fm2, fm3] >>> max_depth = 0 >>> new_depth = True >>> result = depth_profile(list_, max_depth=max_depth, new_depth=new_depth) >>> print(result) """ if isinstance(list_, dict): list_ = list(list_.values()) # handle dict level_shape_list = [] # For a pure bottom level list return the length if not any(map(util_type.is_listlike, list_)): return len(list_) if False and new_depth: pass # max_depth_ = None if max_depth is None else max_depth - 1 # if max_depth_ is None or max_depth_ > 0: # pass # else: # for item in list_: # if isinstance(item, dict): # item = list(item.values()) # handle dict # if util_type.is_listlike(item): # if max_depth is None: # level_shape_list.append(depth_profile(item, None)) # else: # if max_depth >= 0: # level_shape_list.append(depth_profile(item, max_depth - 1)) # else: # level_shape_list.append(str(len(item))) # else: # level_shape_list.append(1) else: for item in list_: if isinstance(item, dict): item = list(item.values()) # handle dict if util_type.is_listlike(item): if max_depth is None: level_shape_list.append(depth_profile(item, None)) else: if max_depth >= 0: level_shape_list.append(depth_profile(item, max_depth - 1)) else: level_shape_list.append(str(len(item))) else: level_shape_list.append(1) if compress_homogenous: # removes redudant information by returning a shape duple if allsame(level_shape_list): dim_ = level_shape_list[0] len_ = len(level_shape_list) if isinstance(dim_, tuple): level_shape_list = tuple([len_] + list(dim_)) else: level_shape_list = tuple([len_, dim_]) if compress_consecutive: hash_list = list(map(hash, map(str, level_shape_list))) consec_list = group_consecutives(hash_list, 0) if len(consec_list) != len(level_shape_list): len_list = list(map(len, consec_list)) cumsum_list = np.cumsum(len_list) consec_str = '[' thresh = 1 for len_, cumsum in zip(len_list, cumsum_list): value = level_shape_list[cumsum - 1] if len_ > thresh: consec_str += str(value) + '] * ' + str(len_) consec_str += ' + [' else: consec_str += str(value) + ', ' if consec_str.endswith(', '): consec_str = consec_str[:-2] # consec_str += ']' # consec_str = consec_str.rstrip(', ').rstrip(']') # consec_str = consec_str.rstrip(', ') # if consec_str.endswith(']'): # consec_str = consec_str[:-1] consec_str += ']' level_shape_list = consec_str return level_shape_list
[docs]def list_type(list_): types = unique_ordered(list(map(type, list_))) if len(types) == 1: return types[0] else: return types
[docs]def list_type_profile(sequence, compress_homogenous=True, with_dtype=True): """ similar to depth_profile but reports types Args: sequence (?): compress_homogenous (bool): (default = True) Returns: str: level_type_str CommandLine: python -m utool.util_list --test-list_type_profile python3 -m utool.util_list --test-list_type_profile Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import numpy as np >>> sequence = [[1, 2], np.array([1, 2, 3], dtype=np.int32), (np.array([1, 2, 3], dtype=np.int32),)] >>> compress_homogenous = True >>> level_type_str = list_type_profile(sequence, compress_homogenous) >>> result = ('level_type_str = %s' % (str(level_type_str),)) >>> print(result) level_type_str = list(list(int*2), ndarray[int32], tuple(ndarray[int32]*1)) """ # For a pure bottom level list return the length # if not any(map(util_type.is_listlike, sequence)) or (isinstance(sequence, np.ndarray) and sequence.dtype != object): if not util_type.is_listlike(sequence) or ( isinstance(sequence, np.ndarray) and sequence.dtype != object ): typename = str(type(sequence)).replace("<type '", '').replace("'>", '') if six.PY3: typename = str(type(sequence)).replace("<class '", '').replace("'>", '') if with_dtype and typename == 'numpy.ndarray': typename = typename.replace('numpy.', '') typename += '[%s]' % (sequence.dtype,) level_type_str = typename return level_type_str if len(sequence) == 0: return '' level_type_list = [] for item in sequence: # if util_type.is_listlike(item): item_type_profile = list_type_profile(item, with_dtype=with_dtype) level_type_list.append(item_type_profile) if compress_homogenous: # removes redudant information by returning a type and number if allsame(level_type_list): type_ = level_type_list[0] level_type_str = str(type_) + '*' + str(len(level_type_list)) else: level_type_str = ', '.join(level_type_list) typename = str(type(sequence)).replace("<type '", '').replace("'>", '') if six.PY3: typename = str(type(sequence)).replace("<class '", '').replace("'>", '') level_type_str = typename + '(' + str(level_type_str) + ')' return level_type_str
[docs]def type_profile2(sequence, TypedSequence=None): """ similar to depth_profile but reports types Args: sequence (?): compress_homogenous (bool): (default = True) Returns: str: level_type_str CommandLine: python -m utool.util_list --exec-type_profile2 Example: >>> # DISABLE_DOCTEST >>> sequence = [] >>> from utool.util_list import * # NOQA >>> self = typeprof = type_profile2(sequence, type_sequence_factory()) >>> result = ('level_type_str = %s' % (str(level_type_str),)) >>> print(result) """ if TypedSequence is None: TypedSequence = type_sequence_factory() # For a pure bottom level list return the length # if not any(map(util_type.is_listlike, sequence)) or (isinstance(sequence, np.ndarray) and sequence.dtype != object): if not util_type.is_listlike(sequence) or ( isinstance(sequence, np.ndarray) and sequence.dtype != object ): # Scalar / ndarray type if type(sequence) == 'numpy.ndarray': subtype_list = '[%s]' % (sequence.dtype,) else: subtype_list = None return TypedSequence(type(sequence), subtype_list) elif util_type.is_listlike(sequence): # Sequence type sequence_type = type(sequence) subtype_list = [] for item in sequence: item_type_profile = type_profile2(item, TypedSequence=TypedSequence) subtype_list.append(item_type_profile) sequence_type_profile = TypedSequence(sequence_type, subtype_list) return sequence_type_profile
# level_type_str = typename + '(' + str(level_type_str) + ')' # return level_type_str
[docs]def type_sequence_factory(): from utool import util_dev class TypedSequence(util_dev.NiceRepr): def __init__(self, type_, subtype_list=None): self.type_ = type_ self.subtype_list = subtype_list def __eq__(self, other): return str(self) == str(other) def type_str(self): type_str = six.text_type(self.type_) type_str = type_str.replace("'>", '') type_str = type_str.replace("<type '", '') type_str = type_str.replace("<class '", '') type_str = type_str.replace('numpy.', '') return type_str def subtype_str(self): import utool as ut if self.subtype_list is None: return '' elif isinstance(self.subtype_list, six.string_types): return self.subtype_list else: prev = None grouped = [] group = [] for item in self.subtype_list: if item == prev or prev is None: group.append(item) else: grouped.append(group) group = [item] prev = item grouped.append(group) if len(grouped) == len(self.subtype_list): toret = ( '(' + ', '.join([sub.__nice__()[1:] for sub in self.subtype_list]) + ')' ) else: compressed_types = ut.take_column(grouped, 0) compressed_lens = ut.lmap(len, grouped) zip(compressed_types, compressed_lens) groupstrs = [ sub.__nice__()[1:] + '*' + str(num) if num > 1 else sub.__nice__()[1:] for sub, num in zip(compressed_types, compressed_lens) ] toret = '(' + ', '.join(groupstrs) + ')' return toret def __nice__(self): type_str = self.type_str() if self.subtype_list is not None: type_str += self.subtype_str() return ' ' + type_str return TypedSequence
type_profile = list_type_profile
[docs]def list_cover(list1, list2): r""" returns boolean for each position in list1 if it is in list2 Args: list1 (list): list2 (list): Returns: list: incover_list - true where list1 intersects list2 CommandLine: python -m utool.util_list --test-list_cover Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> # build test data >>> list1 = [1, 2, 3, 4, 5, 6] >>> list2 = [2, 3, 6] >>> # execute function >>> incover_list = list_cover(list1, list2) >>> # verify results >>> result = str(incover_list) >>> print(result) [False, True, True, False, False, True] """ set2 = set(list2) incover_list = [item1 in set2 for item1 in list1] return incover_list
[docs]def and_lists(*args): # [all(tup) for tup in zip(*args)] return list(util_iter.and_iters(*args))
[docs]def xor_lists(*args): r""" Returns: list: CommandLine: python -m utool.util_list --test-xor_lists Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> args = ([True, False, False, True], [True, True, False, False]) >>> result = xor_lists(*args) >>> print(result) [False, True, False, True] """ return [reduce(operator.xor, tup) for tup in zip(*args)]
[docs]def not_list(flag_list): return [not flag for flag in flag_list]
[docs]def or_lists(*args): return [any(tup) for tup in zip(*args)]
[docs]def make_sortby_func(item_list, reverse=False): sortxs_ = argsort(item_list) sortxs = sortxs_[::-1] if reverse else sortxs_ def sortby_func(list_): return take(list_, sortxs) return sortby_func
[docs]def filter_startswith(list_, str_): def item_startswith(item): return item.startswith(str_) return list(filter(item_startswith, list_))
[docs]def list_roll(list_, n): r""" Like numpy.roll for python lists Args: list\_ (list): n (int): Returns: list: References: http://stackoverflow.com/questions/9457832/python-list-rotation Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [1, 2, 3, 4, 5] >>> n = 2 >>> result = list_roll(list_, n) >>> print(result) [4, 5, 1, 2, 3] Ignore: np.roll(list\_, n) """ return list_[-n:] + list_[:-n]
[docs]def list_argmax(list_): return np.argmax(np.array(list_))
[docs]def list_argmaxima(list_): r""" Args: list_ (list): Returns: list: argmaxima CommandLine: python -m utool.util_list --exec-list_argmaxima Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = np.array([1, 2, 3, 3, 3, 2, 1]) >>> argmaxima = list_argmaxima(list_) >>> result = ('argmaxima = %s' % (str(argmaxima),)) >>> print(result) argmaxima = [2 3 4] """ argmax = list_argmax(list_) maxval = list_[argmax] argmaxima = np.where((np.isclose(maxval, list_)))[0] return argmaxima
[docs]def make_index_lookup(list_, dict_factory=dict): r""" Args: list_ (list): assumed to have unique items Returns: dict: mapping from item to index CommandLine: python -m utool.util_list --exec-make_index_lookup Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list_ = [5, 3, 8, 2] >>> idx2_item = ut.make_index_lookup(list_) >>> result = ut.repr2(idx2_item, nl=False) >>> assert ut.dict_take(idx2_item, list_) == list(range(len(list_))) >>> print(result) {2: 3, 3: 1, 5: 0, 8: 2} """ return dict_factory(zip(list_, range(len(list_))))
[docs]def list_alignment(list1, list2, missing=False): """ Assumes list items are unique Args: list1 (list): a list of unique items to be aligned list2 (list): a list of unique items in a desired ordering missing (bool): True if list2 can contain items not in list1 Returns: list: sorting that will map list1 onto list2 CommandLine: python -m utool.util_list list_alignment Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list1 = ['b', 'c', 'a'] >>> list2 = ['a', 'b', 'c'] >>> sortx = list_alignment(list1, list2) >>> list1_aligned = take(list1, sortx) >>> assert list1_aligned == list2 Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list1 = ['b', 'c', 'a'] >>> list2 = ['a', 'a2', 'b', 'c', 'd'] >>> sortx = ut.list_alignment(list1, list2, missing=True) >>> print('sortx = %r' % (sortx,)) >>> list1_aligned = ut.none_take(list1, sortx) >>> result = ('list1_aligned = %s' % (ut.repr2(list1_aligned),)) >>> print(result) list1_aligned = ['a', None, 'b', 'c', None] """ import utool as ut item1_to_idx = make_index_lookup(list1) if missing: sortx = ut.dict_take(item1_to_idx, list2, None) else: sortx = ut.take(item1_to_idx, list2) return sortx
[docs]def unique_inverse(item_list): """ Like np.unique(item_list, return_inverse=True) """ import utool as ut unique_items = ut.unique(item_list) inverse = list_alignment(unique_items, item_list) return unique_items, inverse
[docs]def list_transpose(list_, shape=None): r""" Swaps rows and columns. nCols should be specified if the initial list is empty. Args: list_ (list): Returns: list: CommandLine: python -m utool.util_list --test-list_transpose Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [[1, 2], [3, 4]] >>> result = list_transpose(list_) >>> print(result) [(1, 3), (2, 4)] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [] >>> result = list_transpose(list_, shape=(0, 5)) >>> print(result) [[], [], [], [], []] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [[], [], [], [], []] >>> result = list_transpose(list_) >>> print(result) [] Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> list_ = [[1, 2, 3], [3, 4]] >>> ut.assert_raises(ValueError, list_transpose, list_) """ num_cols_set = unique([len(x) for x in list_]) if shape is None: if len(num_cols_set) == 0: raise ValueError('listT does not support empty transpose without shapes') else: assert len(shape) == 2, 'shape must be a 2-tuple' if len(num_cols_set) == 0: return [[] for _ in range(shape[1])] elif num_cols_set[0] == 0: return [] if len(num_cols_set) != 1: raise ValueError('inconsistent column lengths=%r' % (num_cols_set,)) return list(zip(*list_))
listT = list_transpose
[docs]def delete_items_by_index(list_, index_list, copy=False): """ Remove items from ``list_`` at positions specified in ``index_list`` The original ``list_`` is preserved if ``copy`` is True Args: list_ (list): index_list (list): copy (bool): preserves original list if True Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> list_ = [8, 1, 8, 1, 6, 6, 3, 4, 4, 5, 6] >>> index_list = [2, -1] >>> result = delete_items_by_index(list_, index_list) >>> print(result) [8, 1, 1, 6, 6, 3, 4, 4, 5] """ if copy: list_ = list_[:] # Rectify negative indicies index_list_ = [(len(list_) + x if x < 0 else x) for x in index_list] # Remove largest indicies first index_list_ = sorted(index_list_, reverse=True) for index in index_list_: del list_[index] return list_
[docs]def delete_list_items(list_, item_list, copy=False): r""" Remove items in ``item_list`` from ``list_``. The original ``list_`` is preserved if ``copy`` is True """ if copy: list_ = list_[:] for item in item_list: list_.remove(item) return list_
[docs]def unflat_map(func, unflat_items, vectorized=False, **kwargs): r""" Uses an wbia lookup function with a non-flat rowid list. In essence this is equivilent to [list(map(func, _items)) for _items in unflat_items]. The utility of this function is that it only calls method once. This is more efficient for calls that can take a list of inputs Args: func (func): function unflat_items (list): list of rowid lists Returns: list of values: unflat_vals CommandLine: python -m utool.util_list --test-unflat_map Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> vectorized = False >>> kwargs = {} >>> def func(x): >>> return x + 1 >>> unflat_items = [[], [1, 2, 3], [4, 5], [6, 7, 8, 9], [], []] >>> unflat_vals = unflat_map(func, unflat_items) >>> result = str(unflat_vals) >>> print(result) [[], [2, 3, 4], [5, 6], [7, 8, 9, 10], [], []] """ import utool as ut # First flatten the list, and remember the original dimensions flat_items, reverse_list = ut.invertible_flatten2(unflat_items) # Then preform the lookup / implicit mapping if vectorized: # func is vectorized flat_vals = func(flat_items, **kwargs) else: flat_vals = [func(item, **kwargs) for item in flat_items] if True: assert len(flat_vals) == len( flat_items ), 'flat lens not the same, len(flat_vals)=%d len(flat_items)=%d' % ( len(flat_vals), len(flat_items), ) # Then ut.unflatten2 the results to the original input dimensions unflat_vals = ut.unflatten2(flat_vals, reverse_list) if True: assert len(unflat_vals) == len( unflat_items ), 'unflat lens not the same, len(unflat_vals)=%d len(unflat_rowids)=%d' % ( len(unflat_vals), len(unflat_items), ) return unflat_vals
[docs]def unflat_vecmap(func, unflat_items, vectorized=False, **kwargs): """unflat map for vectorized functions""" import utool as ut # First flatten the list, and remember the original dimensions flat_items, reverse_list = ut.invertible_flatten2(unflat_items) # Then preform the lookup / implicit mapping flat_vals = func(flat_items, **kwargs) if True: assert len(flat_vals) == len( flat_items ), 'flat lens not the same, len(flat_vals)=%d len(flat_items)=%d' % ( len(flat_vals), len(flat_items), ) # Then ut.unflatten2 the results to the original input dimensions unflat_vals = ut.unflatten2(flat_vals, reverse_list) if True: assert len(unflat_vals) == len( unflat_items ), 'unflat lens not the same, len(unflat_vals)=%d len(unflat_rowids)=%d' % ( len(unflat_vals), len(unflat_items), ) return unflat_vals
[docs]def list_getattr(list_, attrname): return list(map(operator.attrgetter(attrname), list_))
[docs]def list_reshape(list_, new_shape, trail=False): r""" reshapes leaving trailing dimnsions in front if prod(new_shape) != len(list\_) Args: list\_ (list): new_shape (tuple): Returns: list CommandLine: python -m utool.util_list --exec-list_reshape --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> import utool as ut >>> import numpy as np >>> list_ = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] >>> new_shape = (2, 2, 3) >>> newlist = list_reshape(list_, new_shape) >>> depth = ut.depth_profile(newlist) >>> result = ('list_ = %s' % (ut.repr2(newlist, nl=1),)) >>> print('depth = %r' % (depth,)) >>> print(result) >>> newlist2 = np.reshape(list_, depth).tolist() >>> ut.assert_eq(newlist, newlist2) """ if not trail: total = reduce(operator.mul, new_shape) assert total == len(list_) newlist = list_ for dim in reversed(new_shape): slice_ = (newlist[i::dim] for i in range(dim)) newlist = list(map(list, zip(*slice_))) if not trail: newlist = newlist[0] return newlist
[docs]def index_to_boolmask(indices, maxval=None): r""" Constructs a list of booleans where an item is True if its position is in `indices` otherwise it is False. Args: indices (list): list of integer indices maxval (int): length of the returned list. If not specified this is inverred from `indices` Returns: list: mask: list of booleans. mask[idx] is True if idx in indices SeeAlso: vt.index_to_boolmask numpy version CommandLine: python -m vtool.other index_to_boolmask Example: >>> # ENABLE_DOCTEST >>> import utool as ut >>> indices = [0, 1, 4] >>> maxval = 5 >>> mask = ut.index_to_boolmask(indices, maxval) >>> assert mask == [True, True, False, False, True] """ if maxval is None: indices = list(indices) maxval = max(indices) + 1 mask = [False] * maxval for index in indices: mask[index] = True return mask
# Aliases list_compress = compress list_ziptake = ziptake list_zipcompress = zipcompress list_where = where list_take = take list_argsort = argsort bzip = broadcast_zip
[docs]def list_strip(list_, to_strip, left=True, right=True): r""" list\_ = [1, 2, 1, 3, 1, 1] to_strip = 1 stripped_list = ut.list_strip(list\_, to_strip) """ import utool as ut flags = [item != to_strip for item in list_] flag_lists = [] if right: rstrip_flags = ut.cumsum(flags[::-1])[::-1] flag_lists.append(rstrip_flags) if left: lstrip_flags = ut.cumsum(flags) flag_lists.append(lstrip_flags) strip_flags = ut.and_lists(*flag_lists) stripped_list = ut.compress(list_, strip_flags) return stripped_list
[docs]def insert_values(list_, index, values, inplace=False): if inplace: assert False del list_[index] for new in values[::-1]: list_.insert(index, new) else: left_part = list_[:index] right_part = list_[index + 1 :] new_list = left_part + values + right_part return new_list
[docs]def aslist(sequence): r""" Ensures that the sequence object is a Python list. Handles, numpy arrays, and python sequences (e.g. tuples, and iterables). Args: sequence (sequence): a list-like object Returns: list: `sequence` as a Python list Example: >>> # DISABLE_DOCTEST >>> from utool.util_list import * # NOQA >>> s1 = [1, 2, 3] >>> s2 = (1, 2, 3) >>> assert aslist(s1) is s1 >>> assert aslist(s2) is not s2 >>> aslist(np.array([[1, 2], [3, 4], [5, 6]])) [[1, 2], [3, 4], [5, 6]] >>> aslist(range(3)) [0, 1, 2] """ if isinstance(sequence, list): return sequence elif util_type.HAVE_NUMPY and isinstance(sequence, np.ndarray): list_ = sequence.tolist() else: list_ = list(sequence) return list_
# def partition2(list_, idxs1, idxs2): # list1_ = ut.take(list_, idxs1) # list2_ = list(zip(ut.take(list_, idxs2))) # partitioned_items = [list1_, list2_] # return partitioned_items # class ListLike(object): # """ # similar to DictLike # """ # def getitem(self, index): # raise NotImplementedError('abstract getitem function') # def setitem(self, index, value): # raise NotImplementedError('abstract setitem function') # def append(self, value): # raise NotImplementedError('Unfinished') # def insert(self, index, value): # raise NotImplementedError('Unfinished') # def extend(self, values): # raise NotImplementedError('Unfinished') # def pop(self, value): # raise NotImplementedError('Unfinished') # def remove(self, value): # raise NotImplementedError('Unfinished') # def reverse(self, value): # raise NotImplementedError('Unfinished') # def sort(self, value): # raise NotImplementedError('Unfinished') # def aslist(self): # return [self[idx] for idx in range(len(self))] # def __repr__(self): # return repr(self.aslist()) # def __str__(self): # return str(self.aslist()) # def __len__(self): # return len(list(self.keys())) # def __contains__(self, key): # return key in self.aslist() # def __getitem__(self, key): # return self.getitem(key) # def __setitem__(self, key, value): # return self.setitem(key, value)
[docs]def length_hint(obj, default=0): """ Return an estimate of the number of items in obj. This is the PEP 424 implementation. If the object supports len(), the result will be exact. Otherwise, it may over- or under-estimate by an arbitrary amount. The result will be an integer >= 0. """ try: return len(obj) except TypeError: try: get_hint = type(obj).__length_hint__ except AttributeError: return default try: hint = get_hint(obj) except TypeError: return default if hint is NotImplemented: return default if not isinstance(hint, int): raise TypeError('Length hint must be an integer, not %r' % type(hint)) if hint < 0: raise ValueError('__length_hint__() should return >= 0') return hint
[docs]def partial_order(list_, part): list_items = set(list_) part_items = set(part) begin = [p for p in part if p in list_items] end = [item for item in list_ if item not in part_items] return begin + end
if __name__ == '__main__': """ CommandLine: python -c "import utool, utool.util_list; utool.doctest_funcs(utool.util_list, allexamples=True)" python -c "import utool, utool.util_list; utool.doctest_funcs(utool.util_list)" python -m utool.util_list python -m utool.util_list --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()