Source code for utool.util_iter

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import six
import itertools as it
import functools
import operator
from six.moves import zip, range, zip_longest, reduce
from utool import util_inject
from utool._internal import meta_util_iter

print, rrr, profile = util_inject.inject2(__name__)

ensure_iterable = meta_util_iter.ensure_iterable
isiterable = meta_util_iter.isiterable
isscalar = meta_util_iter.isscalar


[docs]def wrap_iterable(obj): """ Returns: wrapped_obj, was_scalar """ was_scalar = not isiterable(obj) wrapped_obj = [obj] if was_scalar else obj return wrapped_obj, was_scalar
[docs]def next_counter(start=0, step=1): r""" Args: start (int): (default = 0) step (int): (default = 1) Returns: func: next_ CommandLine: python -m utool.util_iter --test-next_counter Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> start = 1 >>> step = 1 >>> next_ = next_counter(start, step) >>> result = str([next_(), next_(), next_()]) >>> print(result) [1, 2, 3] """ count_gen = it.count(start, step) next_ = functools.partial(six.next, count_gen) return next_
[docs]def evaluate_generator(iter_): """ for evaluating each item in a generator and ignoring output """ for _ in iter_: # NOQA pass
# TODO: check if faster # try: # while True: # six.next(iter_) # except StopIteration: # pass
[docs]def itake_column(list_, colx): """ iterator version of get_list_column """ if isinstance(colx, list): # multi select return ([row[colx_] for colx_ in colx] for row in list_) else: return (row[colx] for row in list_)
iget_list_column = itake_column
[docs]def iget_list_column_slice(list_, start=None, stop=None, stride=None): """ iterator version of get_list_column """ if isinstance(start, slice): slice_ = start else: slice_ = slice(start, stop, stride) return (row[slice_] for row in list_)
[docs]def iter_window(iterable, size=2, step=1, wrap=False): r""" iterates through iterable with a window size generalizeation of itertwo Args: iterable (iter): an iterable sequence size (int): window size (default = 2) wrap (bool): wraparound (default = False) Returns: iter: returns windows in a sequence CommandLine: python -m utool.util_iter --exec-iter_window Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = [1, 2, 3, 4, 5, 6] >>> size, step, wrap = 3, 1, True >>> window_iter = iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> result = ('window_list = %r' % (window_list,)) >>> print(result) window_list = [(1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6), (5, 6, 1), (6, 1, 2)] Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = [1, 2, 3, 4, 5, 6] >>> size, step, wrap = 3, 2, True >>> window_iter = iter_window(iterable, size, step, wrap) >>> window_list = list(window_iter) >>> result = ('window_list = %r' % (window_list,)) >>> print(result) window_list = [(1, 2, 3), (3, 4, 5), (5, 6, 1)] """ # it.tee may be slow, but works on all iterables iter_list = it.tee(iterable, size) if wrap: # Secondary iterables need to be cycled for wraparound iter_list = [iter_list[0]] + list(map(it.cycle, iter_list[1:])) # Step each iterator the approprate number of times try: for count, iter_ in enumerate(iter_list[1:], start=1): for _ in range(count): six.next(iter_) except StopIteration: return iter(()) else: _window_iter = zip(*iter_list) # Account for the step size window_iter = it.islice(_window_iter, 0, None, step) return window_iter
[docs]def itertwo(iterable, wrap=False): r""" equivalent to iter_window(iterable, 2, 1, wrap) Args: iterable (iter): an iterable sequence wrap (bool): if True, returns with wraparound Returns: iter: returns edges in a sequence CommandLine: python -m utool.util_iter --test-itertwo Example0: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = [1, 2, 3, 4] >>> wrap = False >>> edges = list(itertwo(iterable, wrap)) >>> result = ('edges = %r' % (edges,)) >>> print(result) edges = [(1, 2), (2, 3), (3, 4)] Example1: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = [1, 2, 3, 4] >>> wrap = True >>> edges = list(itertwo(iterable, wrap)) >>> result = ('edges = %r' % (edges,)) >>> print(result) edges = [(1, 2), (2, 3), (3, 4), (4, 1)] Example2: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> iterable = iter([1, 2, 3, 4]) >>> wrap = False >>> edge_iter = itertwo(iterable, wrap) >>> edges = list(edge_iter) >>> result = ('edges = %r' % (edges,)) >>> ut.assert_eq(len(list(iterable)), 0, 'iterable should have been used up') >>> print(result) edges = [(1, 2), (2, 3), (3, 4)] Ignore: >>> # BENCHMARK >>> import random >>> import numpy as np >>> rng = random.Random(0) >>> iterable = [rng.randint(0, 2000) for _ in range(100000)] >>> iterable2 = np.array(iterable) >>> # >>> import ubelt as ub >>> ti = ub.Timerit(100, bestof=10, verbose=2) >>> # >>> for timer in ti.reset('list-zip'): >>> with timer: >>> list(zip(iterable, iterable[1:])) >>> # >>> for timer in ti.reset('list-itertwo'): >>> with timer: >>> list(itertwo(iterable)) >>> # >>> for timer in ti.reset('iter_window(2)'): >>> with timer: >>> list(ub.iter_window(iterable, 2)) >>> # >>> for timer in ti.reset('list-zip-numpy'): >>> with timer: >>> list(zip(iterable2, iterable2[1:])) >>> # >>> for timer in ti.reset('list-zip-numpy.tolist'): >>> with timer: >>> list(zip(iterable2.tolist(), iterable2.tolist()[1:])) >>> # >>> for timer in ti.reset('list-itertwo-numpy'): >>> with timer: >>> list(itertwo(iterable2)) >>> # >>> for timer in ti.reset('list-itertwo-numpy-tolist'): >>> with timer: >>> list(itertwo(iterable2.tolist())) >>> # >>> print(ub.repr2(ti.rankings)) >>> print(ub.repr2(ti.consistency)) """ # it.tee may be slow, but works on all iterables iter1, iter2 = it.tee(iterable, 2) if wrap: iter2 = it.cycle(iter2) try: six.next(iter2) except StopIteration: return iter(()) else: return zip(iter1, iter2)
[docs]def iter_compress(item_iter, flag_iter): """ iter_compress - like numpy compress Args: item_iter (list): flag_iter (list): of bools Returns: list: true_items Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> item_iter = [1, 2, 3, 4, 5] >>> flag_iter = [False, True, True, False, True] >>> true_items = iter_compress(item_iter, flag_iter) >>> result = list(true_items) >>> print(result) [2, 3, 5] """ # TODO: Just use it.compress true_items = (item for (item, flag) in zip(item_iter, flag_iter) if flag) return true_items
ifilter_items = iter_compress
[docs]def ifilterfalse_items(item_iter, flag_iter): """ ifilterfalse_items Args: item_iter (list): flag_iter (list): of bools Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> item_iter = [1, 2, 3, 4, 5] >>> flag_iter = [False, True, True, False, True] >>> false_items = ifilterfalse_items(item_iter, flag_iter) >>> result = list(false_items) >>> print(result) [1, 4] """ false_items = (item for (item, flag) in zip(item_iter, flag_iter) if not flag) return false_items
[docs]def ifilter_Nones(iter_): """ Removes any nones from the iterable """ return (item for item in iter_ if item is not None)
[docs]def iflatten(list_): r""" flattens a list iteratively """ # very fast flatten flat_iter = it.chain.from_iterable(list_) return flat_iter
[docs]def iter_multichunks(iterable, chunksizes, bordermode=None): """ CommandLine: python -m utool.util_iter --test-iter_multichunks Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> iterable = list(range(20)) >>> chunksizes = (3, 2, 3) >>> bordermode = 'cycle' >>> genresult = iter_multichunks(iterable, chunksizes, bordermode) >>> multichunks = list(genresult) >>> depthprofile = ut.depth_profile(multichunks) >>> assert depthprofile[1:] == chunksizes, 'did not generate chunks correctly' >>> result = ut.repr4(list(map(str, multichunks)), nobr=True) >>> print(result) '[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]]]', '[[[18, 19, 0], [1, 2, 3]], [[4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15]]]', Example1: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> iterable = list(range(7)) >>> # when chunksizes is len == 1, then equlivalent to ichunks >>> chunksizes = (3,) >>> bordermode = 'cycle' >>> genresult = iter_multichunks(iterable, chunksizes, bordermode) >>> multichunks = list(genresult) >>> depthprofile = ut.depth_profile(multichunks) >>> assert depthprofile[1:] == chunksizes, 'did not generate chunks correctly' >>> result = str(multichunks) >>> print(result) [[0, 1, 2], [3, 4, 5], [6, 0, 1]] """ chunksize = reduce(operator.mul, chunksizes) for chunk in ichunks(iterable, chunksize, bordermode=bordermode): reshaped_chunk = chunk for d in chunksizes[1:][::-1]: reshaped_chunk = list(ichunks(reshaped_chunk, d)) yield reshaped_chunk
[docs]def ichunks(iterable, chunksize, bordermode=None): r""" generates successive n-sized chunks from ``iterable``. Args: iterable (list): input to iterate over chunksize (int): size of sublist to return bordermode (str): None, 'cycle', or 'replicate' References: http://stackoverflow.com/questions/434287/iterate-over-a-list-in-chunks SeeAlso: util_progress.get_num_chunks CommandLine: python -m utool.util_iter --exec-ichunks --show Timeit: >>> import utool as ut >>> setup = ut.codeblock(''' from utool.util_iter import * # NOQA iterable = list(range(100)) chunksize = 8 ''') >>> stmt_list = [ ... 'list(ichunks(iterable, chunksize))', ... 'list(ichunks_noborder(iterable, chunksize))', ... 'list(ichunks_list(iterable, chunksize))', ... ] >>> (passed, times, results) = ut.timeit_compare(stmt_list, setup) Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = [1, 2, 3, 4, 5, 6, 7] >>> chunksize = 3 >>> genresult = ichunks(iterable, chunksize) >>> result = list(genresult) >>> print(result) [[1, 2, 3], [4, 5, 6], [7]] Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = (1, 2, 3, 4, 5, 6, 7) >>> chunksize = 3 >>> bordermode = 'cycle' >>> genresult = ichunks(iterable, chunksize, bordermode) >>> result = list(genresult) >>> print(result) [[1, 2, 3], [4, 5, 6], [7, 1, 2]] Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> iterable = (1, 2, 3, 4, 5, 6, 7) >>> chunksize = 3 >>> bordermode = 'replicate' >>> genresult = ichunks(iterable, chunksize, bordermode) >>> result = list(genresult) >>> print(result) [[1, 2, 3], [4, 5, 6], [7, 7, 7]] """ if bordermode is None: return ichunks_noborder(iterable, chunksize) elif bordermode == 'cycle': return ichunks_cycle(iterable, chunksize) elif bordermode == 'replicate': return ichunks_replicate(iterable, chunksize) else: raise ValueError('unknown bordermode=%r' % (bordermode,))
[docs]def ichunks_noborder(iterable, chunksize): # feed the same iter to zip_longest multiple times, this causes it to # consume successive values of the same sequence rather than striped values sentinal = object() copied_iterators = [iter(iterable)] * chunksize chunks_with_sentinals = zip_longest(*copied_iterators, fillvalue=sentinal) # Yeild smaller chunks without sentinals for chunk in chunks_with_sentinals: if len(chunk) > 0: yield [item for item in chunk if item is not sentinal]
[docs]def ichunks_cycle(iterable, chunksize): # feed the same iter to zip_longest multiple times, this causes it to # consume successive values of the same sequence rather than striped values sentinal = object() copied_iterators = [iter(iterable)] * chunksize chunks_with_sentinals = zip_longest(*copied_iterators, fillvalue=sentinal) bordervalues = it.cycle(iter(iterable)) # Yeild smaller chunks without sentinals for chunk in chunks_with_sentinals: if len(chunk) > 0: yield [ item if item is not sentinal else six.next(bordervalues) for item in chunk ]
[docs]def ichunks_replicate(iterable, chunksize): # feed the same iter to zip_longest multiple times, this causes it to # consume successive values of the same sequence rather than striped values sentinal = object() copied_iterators = [iter(iterable)] * chunksize chunks_with_sentinals = zip_longest(*copied_iterators, fillvalue=sentinal) # Yeild smaller chunks without sentinals for chunk in chunks_with_sentinals: if len(chunk) > 0: filtered_chunk = [item for item in chunk if item is not sentinal] if len(filtered_chunk) == chunksize: yield filtered_chunk else: sizediff = chunksize - len(filtered_chunk) padded_chunk = filtered_chunk + [filtered_chunk[-1]] * sizediff yield padded_chunk
[docs]def ichunks_list(list_, chunksize): """ input must be a list. SeeAlso: ichunks References: http://stackoverflow.com/questions/434287/iterate-over-a-list-in-chunks """ return (list_[ix : ix + chunksize] for ix in range(0, len(list_), chunksize))
# return (list_[sl] for sl in ichunk_slices(len(list_), chunksize))
[docs]def ichunk_slices(total, chunksize): for ix in range(0, total, chunksize): yield slice(ix, ix + chunksize)
[docs]def interleave(args): r""" zip followed by flatten Args: args (tuple): tuple of lists to interleave SeeAlso: You may actually be better off doing something like this: a, b, = args ut.flatten(ut.bzip(a, b)) ut.flatten(ut.bzip([1, 2, 3], ['-'])) [1, '-', 2, '-', 3, '-'] Example: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> args = ([1, 2, 3, 4, 5], ['A', 'B', 'C', 'D', 'E', 'F', 'G']) >>> genresult = interleave(args) >>> result = ut.repr4(list(genresult), nl=False) >>> print(result) [1, 'A', 2, 'B', 3, 'C', 4, 'D', 5, 'E'] """ arg_iters = list(map(iter, args)) cycle_iter = it.cycle(arg_iters) for iter_ in cycle_iter: try: yield six.next(iter_) except StopIteration: return
[docs]def and_iters(*args): return (all(tup) for tup in zip(*args))
[docs]def random_product(items, num=None, rng=None): """ Yields `num` items from the cartesian product of items in a random order. Args: items (list of sequences): items to get caresian product of packed in a list or tuple. (note this deviates from api of it.product) Example: import utool as ut items = [(1, 2, 3), (4, 5, 6, 7)] rng = 0 list(ut.random_product(items, rng=0)) list(ut.random_product(items, num=3, rng=0)) """ import utool as ut rng = ut.ensure_rng(rng, 'python') seen = set() items = [list(g) for g in items] max_num = ut.prod(map(len, items)) if num is None: num = max_num if num > max_num: raise ValueError('num exceedes maximum number of products') # TODO: make this more efficient when num is large if num > max_num // 2: for prod in ut.shuffle(list(it.product(*items)), rng=rng): yield prod else: while len(seen) < num: # combo = tuple(sorted(rng.choice(items, size, replace=False))) idxs = tuple(rng.randint(0, len(g) - 1) for g in items) if idxs not in seen: seen.add(idxs) prod = tuple(g[x] for g, x in zip(items, idxs)) yield prod
[docs]def random_combinations(items, size, num=None, rng=None): """ Yields `num` combinations of length `size` from items in random order Args: items (?): size (?): num (None): (default = None) rng (RandomState): random number generator(default = None) Yields: tuple: combo CommandLine: python -m utool.util_iter random_combinations Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> items = list(range(10)) >>> size = 3 >>> num = 5 >>> rng = 0 >>> combos = list(random_combinations(items, size, num, rng)) >>> result = ('combos = %s' % (ut.repr2(combos),)) >>> print(result) Ignore: >>> # ENABLE_DOCTEST >>> from utool.util_iter import * # NOQA >>> import utool as ut >>> items = list(zip(range(10), range(10))) >>> size = 3 >>> num = 5 >>> rng = 0 >>> combos = list(random_combinations(items, size, num, rng)) >>> result = ('combos = %s' % (ut.repr2(combos),)) >>> print(result) """ import scipy.special import numpy as np import utool as ut rng = ut.ensure_rng(rng, impl='python') num_ = np.inf if num is None else num # Ensure we dont request more than is possible n_max = int(scipy.special.comb(len(items), size)) num_ = min(n_max, num_) if num is not None and num_ > n_max // 2: # If num is too big just generate all combinations and shuffle them combos = list(it.combinations(items, size)) rng.shuffle(combos) for combo in combos[:num]: yield combo else: # Otherwise yield randomly until we get something we havent seen items = list(items) combos = set() while len(combos) < num_: # combo = tuple(sorted(rng.choice(items, size, replace=False))) combo = tuple(sorted(rng.sample(items, size))) if combo not in combos: # TODO: store indices instead of combo values combos.add(combo) yield combo
if __name__ == '__main__': """ CommandLine: python -m utool.util_iter python -m utool.util_iter --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()