Source code for utool.util_parallel

# -*- coding: utf-8 -*-
"""
Module to executes the same function with different arguments in parallel.
"""
from __future__ import absolute_import, division, print_function
import multiprocessing
import atexit
#import sys
import signal
import ctypes
import six
import threading
from six.moves import map, range, zip  # NOQA
from utool._internal.meta_util_six import get_funcname
from utool import util_progress
from utool import util_time
from utool import util_arg
from utool import util_dbg
from utool import util_inject
from utool import util_cplat
if six.PY2:
    import thread as _thread
    import Queue as queue
elif six.PY3:
    import _thread
    import queue
util_inject.noinject('[parallel]')

QUIET   = util_arg.QUIET
SILENT  = util_arg.SILENT
VERBOSE_PARALLEL, VERYVERBOSE_PARALLEL = util_arg.get_module_verbosity_flags('par', 'parallel')
#VERBOSE_PARALLEL = util_arg.VERBOSE or util_arg.get_argflag(('--verbose-par', '--verbpar', '--verbose-parallel', '--verbparallel'))
#VERYVERBOSE_PARALLEL = util_arg.VERYVERBOSE or util_arg.get_argflag(('--veryverbose-par', '--veryverbpar', '--veryverbose-parallel', '--veryverbparallel'))
STRICT  = util_arg.STRICT

if SILENT:
    def print(msg):
        pass

__POOL__ = None
#__EAGER_JOIN__      = util_arg.get_argflag('--eager-join')
__EAGER_JOIN__      = not util_arg.get_argflag('--noclose-pool')

__NUM_PROCS__       = util_arg.get_argval('--num-procs', int, default=None)
__FORCE_SERIAL__    = util_arg.get_argflag(
    ('--utool-force-serial', '--force-serial', '--serial'))
#__FORCE_SERIAL__    = True
__SERIAL_FALLBACK__ = not util_arg.get_argflag('--noserial-fallback')
__TIME_GENERATE__   = VERBOSE_PARALLEL or util_arg.get_argflag('--time-generate')

# Maybe global pooling is not correct?
USE_GLOBAL_POOL = util_arg.get_argflag('--use_global_pool')


# FIXME: running tests in IBEIS has errors when this number is low
# Due to the large number of parallel processes running?
MIN_PARALLEL_TASKS = 4
#MIN_PARALLEL_TASKS = 16
if util_cplat.WIN32:
    MIN_PARALLEL_TASKS = 16


BACKEND = 'multiprocessing'

#TODO:
#    http://dispy.sourceforge.net/

if BACKEND == 'gevent':
    raise NotImplementedError('gevent cannot run on multiple cpus')
    pass
elif BACKEND == 'zeromq':
    # TODO: Implement zeromq backend
    #http://zguide.zeromq.org/py:mtserver
    raise NotImplementedError('no zeromq yet')
    pass
elif BACKEND == 'multiprocessing':
    """
    expecting
    multiprocessing.__file__ = /usr/lib/python2.7/multiprocessing/__init__.pyc
    multiprocessing.__version__ >= 0.70a1

    BUT PIP SAYS:
        INSTALLED: 2.6.2.1 (latest)

    because multiprocessing on pip is:
    Backport of the multiprocessing package to Python 2.4 and 2.5

    ut.editfile(multiprocessing.__file__)
    from multiprocessing.pool import ThreadPool
    """
[docs] def new_pool(num_procs, init_worker, maxtasksperchild): return multiprocessing.Pool(processes=num_procs, initializer=init_worker, maxtasksperchild=maxtasksperchild)
pass
[docs]def set_num_procs(num_procs): global __NUM_PROCS__ __NUM_PROCS__ = num_procs
[docs]def in_main_process(): """ Returns if you are executing in a multiprocessing subprocess Usefull to disable init print messages on windows """ return multiprocessing.current_process().name == 'MainProcess'
[docs]def get_default_numprocs(): if __NUM_PROCS__ is not None: return __NUM_PROCS__ #if WIN32: # num_procs = 3 # default windows to 3 processes for now #else: # num_procs = max(multiprocessing.cpu_count() - 2, 1) num_procs = max(multiprocessing.cpu_count() - 1, 1) return num_procs
[docs]def init_worker(): signal.signal(signal.SIGINT, signal.SIG_IGN)
[docs]def init_pool(num_procs=None, maxtasksperchild=None, quiet=QUIET, **kwargs): """ warning this might not be the right hting to do """ global __POOL__ if VERBOSE_PARALLEL: print('[util_parallel] init_pool()') if num_procs is None: # Get number of cpu cores num_procs = get_default_numprocs() if not quiet: print('[util_parallel.init_pool] initializing pool with %d processes' % num_procs) if num_procs == 1: print('[util_parallel.init_pool] num_procs=1, Will process in serial') __POOL__ = 1 return __POOL__ if STRICT: assert multiprocessing.current_process().name, ( 'can only initialize from main process') if __POOL__ is not None: print('[util_parallel.init_pool] close pool before reinitializing') return __POOL__ # Create the pool of processes #__POOL__ = multiprocessing.Pool(processes=num_procs, # initializer=init_worker, maxtasksperchild=maxtasksperchild) if not USE_GLOBAL_POOL: raise AssertionError('Global pool initialization is not allowed') __POOL__ = new_pool(num_procs, init_worker, maxtasksperchild) return __POOL__
@atexit.register
[docs]def close_pool(terminate=False, quiet=QUIET): global __POOL__ if VERBOSE_PARALLEL: print('[util_parallel] close_pool()') if __POOL__ is not None: if not quiet: if terminate: print('[util_parallel] terminating pool') else: print('[util_parallel] closing pool') if not isinstance(__POOL__, int): # Must join after close to avoid runtime errors if not USE_GLOBAL_POOL: raise AssertionError( 'Global pools are no longer allowed. ' 'Should be impossible to call this') if terminate: __POOL__.terminate() __POOL__.close() __POOL__.join() __POOL__ = None
def _process_serial(func, args_list, args_dict={}, nTasks=None, quiet=QUIET): """ Serial process map Use generate instead """ if nTasks is None: nTasks = len(args_list) result_list = [] prog_iter = util_progress.ProgressIter( args_list, nTotal=nTasks, lbl=get_funcname(func) + ': ', adjust=True) # Execute each task sequentially for args in prog_iter: result = func(*args, **args_dict) result_list.append(result) return result_list def _process_parallel(func, args_list, args_dict={}, nTasks=None, quiet=QUIET, pool=None): """ Parallel process map Use generate instead """ # Define progress observers if nTasks is None: nTasks = len(args_list) _prog = util_progress.ProgressIter( range(nTasks), nTotal=nTasks, lbl=get_funcname(func) + ': ', adjust=True) _prog_iter = iter(_prog) num_tasks_returned_ptr = [0] def _callback(result): six.next(_prog_iter) num_tasks_returned_ptr[0] += 1 # Send all tasks to be executed asynconously apply_results = [pool.apply_async(func, args, args_dict, _callback) for args in args_list] # Wait until all tasks have been processed while num_tasks_returned_ptr[0] < nTasks: #print('Waiting: ' + str(num_tasks_returned_ptr[0]) + '/' + str(nTasks)) pass # Get the results result_list = [ap.get() for ap in apply_results] if __EAGER_JOIN__: if USE_GLOBAL_POOL: close_pool(quiet=quiet) else: pool.close() pool.join() return result_list def _generate_parallel(func, args_list, ordered=True, chunksize=None, prog=True, verbose=True, quiet=QUIET, nTasks=None, **kwargs): """ Parallel process generator """ global __POOL__ if USE_GLOBAL_POOL: global __POOL__ pool = __POOL__ else: # Maybe global pools are bad? pool = new_pool(num_procs=get_default_numprocs(), init_worker=init_worker, maxtasksperchild=None) #pool = new_pool() prog = prog and verbose if nTasks is None: nTasks = len(args_list) if chunksize is None: chunksize = max(min(4, nTasks), min(8, nTasks // (pool._processes ** 2))) if verbose or VERBOSE_PARALLEL: prefix = '[util_parallel._generate_parallel]' fmtstr = (prefix + 'executing %d %s tasks using %d processes with chunksize=%r') print(fmtstr % (nTasks, get_funcname(func), pool._processes, chunksize)) #import utool as ut #buffered = ut.get_argflag('--buffered') #buffered = False #if buffered: # # current tests indicate that normal pool.imap is faster than buffered # # generation # source_gen = (func(args) for args in args_list) # raw_generator = buffered_generator(source_gen) #else: pmap_func = pool.imap if ordered else pool.imap_unordered raw_generator = pmap_func(func, args_list, chunksize) # Get iterator with or without progress if prog: result_generator = util_progress.ProgressIter( raw_generator, nTotal=nTasks, lbl=get_funcname(func) + ': ', freq=kwargs.get('freq', None), adjust=kwargs.get('adjust', False)) else: result_generator = raw_generator if __TIME_GENERATE__: tt = util_time.tic('_generate_parallel func=' + get_funcname(func)) try: # Start generating for result in result_generator: yield result if __EAGER_JOIN__: if USE_GLOBAL_POOL: close_pool(quiet=quiet) else: pool.close() pool.join() except Exception as ex: util_dbg.printex(ex, 'Parallel Generation Failed!', '[utool]', tb=True) if __EAGER_JOIN__: if USE_GLOBAL_POOL: close_pool(quiet=quiet) else: pool.close() pool.join() print('__SERIAL_FALLBACK__ = %r' % __SERIAL_FALLBACK__) if __SERIAL_FALLBACK__: print('Trying to handle error by falling back to serial') serial_generator = _generate_serial( func, args_list, prog=prog, verbose=verbose, nTasks=nTasks, **kwargs) for result in serial_generator: yield result else: raise if __TIME_GENERATE__: util_time.toc(tt) def _generate_serial(func, args_list, prog=True, verbose=True, nTasks=None, **kwargs): """ internal serial generator """ if nTasks is None: nTasks = len(args_list) if verbose: print('[util_parallel._generate_serial] executing %d %s tasks in serial' % (nTasks, get_funcname(func))) prog = prog and verbose and nTasks > 1 # Get iterator with or without progress args_iter = ( util_progress.ProgressIter(args_list, nTotal=nTasks, lbl=get_funcname(func) + ': ', freq=kwargs.get('freq', None), adjust=kwargs.get('adjust', False)) if prog else args_list ) if __TIME_GENERATE__: tt = util_time.tic('_generate_serial func=' + get_funcname(func)) for args in args_iter: result = func(args) yield result if __TIME_GENERATE__: util_time.toc(tt)
[docs]def ensure_pool(warn=False, quiet=QUIET): global __POOL__ try: assert __POOL__ is not None, 'must init_pool() first' except AssertionError as ex: if warn: print('(WARNING) AssertionError: ' + str(ex)) return init_pool(quiet=quiet)
[docs]def generate(func, args_list, ordered=True, force_serial=None, chunksize=None, prog=True, verbose=True, quiet=QUIET, nTasks=None, freq=None, **kwargs): """ Provides an interfaces to python's multiprocessing module. Esentially maps ``args_list`` onto ``func`` using pool.imap. Useful for embarrassingly parallel loops. Currently does not work with opencv3 Args: func (function): function to apply each argument to args_list (list or iter): sequence of tuples which are args for each function call ordered (bool): force_serial (bool): chunksize (int): prog (bool): verbose (bool): nTasks (int): optional (must be specified if args_list is an iterator) Returns: generator which yeilds result of applying func to args in args_list CommandLine: python -m utool.util_parallel --test-generate python -m utool.util_parallel --test-generate:0 python -m utool.util_parallel --test-generate:0 --use-global-pool python -m utool.util_parallel --test-generate:1 python -m utool.util_parallel --test-generate:2 python -m utool.util_parallel --test-generate:3 python -m utool.util_parallel --test-generate --verbose python -c "import multiprocessing; print(multiprocessing.__version__)" python -c "import cv2; print(cv2.__version__)" Example0: >>> # ENABLE_DOCTEST >>> import utool as ut >>> #num = 8700 # parallel is slower for smaller numbers >>> num = 500 # parallel has an initial (~.1 second startup overhead) >>> print('TESTING SERIAL') >>> flag_generator0 = ut.generate(ut.is_prime, range(0, num), force_serial=True, freq=num / 4) >>> flag_list0 = list(flag_generator0) >>> print('TESTING PARALLEL') >>> flag_generator1 = ut.generate(ut.is_prime, range(0, num), freq=num / 10) >>> flag_list1 = list(flag_generator1) >>> print('ASSERTING') >>> assert len(flag_list1) == num >>> assert flag_list0 == flag_list1 Example1: >>> # ENABLE_DOCTEST >>> # Trying to recreate the freeze seen in IBEIS >>> import utool as ut >>> print('TESTING SERIAL') >>> flag_generator0 = ut.generate(ut.is_prime, range(0, 1)) >>> flag_list0 = list(flag_generator0) >>> flag_generator1 = ut.generate(ut.fibonacci_recursive, range(0, 1)) >>> flag_list1 = list(flag_generator1) >>> print('TESTING PARALLEL') >>> flag_generator2 = ut.generate(ut.is_prime, range(0, 12)) >>> flag_list2 = list(flag_generator2) >>> flag_generator3 = ut.generate(ut.fibonacci_recursive, range(0, 12)) >>> flag_list3 = list(flag_generator3) >>> print('flag_list0 = %r' % (flag_list0,)) >>> print('flag_list1 = %r' % (flag_list1,)) >>> print('flag_list2 = %r' % (flag_list1,)) >>> print('flag_list3 = %r' % (flag_list1,)) Example2: >>> # UNSTABLE_DOCTEST >>> # Trying to recreate the freeze seen in IBEIS >>> import vtool as vt >>> #def gen_chip(tup): >>> # import vtool as vt >>> # cfpath, gfpath, bbox, theta, new_size, filter_list = tup >>> # chipBGR = vt.compute_chip(gfpath, bbox, theta, new_size, filter_list) >>> # height, width = chipBGR.shape[0:2] >>> # vt.imwrite(cfpath, chipBGR) >>> # return cfpath, width, height >>> import utool as ut >>> from ibeis.algo.preproc.preproc_chip import gen_chip >>> #from ibeis.algo.preproc.preproc_feat import gen_feat_worker >>> key_list = ['grace.jpg', 'easy1.png', 'ada2.jpg', 'easy3.png', >>> 'hard3.png', 'zebra.png', 'patsy.jpg', 'ada.jpg', >>> 'carl.jpg', 'lena.png', 'easy2.png'] >>> img_fpath_list = [ut.grab_test_imgpath(key) for key in key_list] >>> arg_list1 = [(ut.augpath(img_fpath, '_testgen'), img_fpath, (0, 0, 100, 100), 0.0, (545, 372), []) for img_fpath in img_fpath_list[0:1]] >>> arg_list2 = [(ut.augpath(img_fpath, '_testgen'), img_fpath, (0, 0, 100, 100), 0.0, (545, 372), []) for img_fpath in img_fpath_list] >>> #arg_list3 = [(count, fpath, {}) for count, fpath in enumerate(ut.get_list_column(arg_list1, 0))] >>> #arg_list4 = [(count, fpath, {}) for count, fpath in enumerate(ut.get_list_column(arg_list2, 0))] >>> ut.remove_file_list(ut.get_list_column(arg_list2, 0)) >>> chips1 = [x for x in ut.generate(gen_chip, arg_list1)] >>> chips2 = [y for y in ut.generate(gen_chip, arg_list2, force_serial=True)] >>> #feats3 = [z for z in ut.generate(gen_feat_worker, arg_list3)] >>> #feats4 = [w for w in ut.generate(gen_feat_worker, arg_list4)] Example3: >>> # FAILING_DOCTEST >>> # Trying to recreate the freeze seen in IBEIS >>> # Extremely weird case: freezes only if dsize > (313, 313) AND __testwarp was called beforehand. >>> # otherwise the parallel loop works fine. Could be an opencv 3.0.0-dev issue. >>> import vtool as vt >>> import utool as ut >>> from ibeis.algo.preproc.preproc_chip import gen_chip >>> import cv2 >>> from utool.util_parallel import __testwarp >>> key_list = ['grace.jpg', 'easy1.png', 'ada2.jpg', 'easy3.png', >>> 'hard3.png', 'zebra.png', 'patsy.jpg', 'ada.jpg', >>> 'carl.jpg', 'lena.png', 'easy2.png'] >>> img_fpath_list = [ut.grab_test_imgpath(key) for key in key_list] >>> arg_list1 = [(vt.imread(fpath),) for fpath in img_fpath_list[0:1]] >>> arg_list2 = [(vt.imread(fpath),) for fpath in img_fpath_list] >>> #new1 = [x for x in ut.generate(__testwarp, arg_list1)] >>> new1 = __testwarp(arg_list1[0]) >>> new2 = [y for y in ut.generate(__testwarp, arg_list2, force_serial=False)] >>> #print('new2 = %r' % (new2,)) #Example4: # >>> # Freakin weird. When IBEIS Runs generate it doesn't close the processes # >>> # UNSTABLE_DOCTEST # >>> # python -m utool.util_parallel --test-generate:4 # >>> # Trying to see if we can recreate the problem where there are # >>> # defunct python processes # >>> import utool as ut # >>> #num = 8700 # parallel is slower for smaller numbers # >>> num = 70000 # parallel has an initial (~.1 second startup overhead) # >>> print('TESTING PARALLEL') # >>> flag_generator1 = list(ut.generate(ut.is_prime, range(0, num))) # >>> flag_generator1 = list(ut.generate(ut.is_prime, range(0, num))) # >>> import time # >>> time.sleep(10) """ if force_serial is None: force_serial = __FORCE_SERIAL__ if nTasks is None: nTasks = len(args_list) if nTasks == 0: if VERBOSE_PARALLEL or verbose: print('[util_parallel.generate] submitted 0 tasks') return iter([]) if VERYVERBOSE_PARALLEL: print('[util_parallel.generate] ordered=%r' % ordered) print('[util_parallel.generate] force_serial=%r' % force_serial) # Check conditions under which we force serial force_serial_ = nTasks == 1 or nTasks < MIN_PARALLEL_TASKS or force_serial if USE_GLOBAL_POOL: if not force_serial_: ensure_pool(quiet=quiet) if force_serial_ or isinstance(__POOL__, int): if VERBOSE_PARALLEL or verbose: print('[util_parallel.generate] generate_serial') return _generate_serial(func, args_list, prog=prog, nTasks=nTasks, freq=freq, **kwargs) else: if VERBOSE_PARALLEL or verbose: print('[util_parallel.generate] generate_parallel') return _generate_parallel(func, args_list, ordered=ordered, chunksize=chunksize, prog=prog, verbose=verbose, quiet=quiet, nTasks=nTasks, freq=freq, **kwargs)
def __testwarp(tup): # THIS DOES NOT CAUSE A PROBLEM FOR SOME FREAKING REASON import cv2 import numpy as np import vtool as vt img = tup[0] M = vt.rotation_mat3x3(.1)[0:2].dot(vt.translation_mat3x3(-10, 10)) #new = cv2.warpAffine(img, M[0:2], (500, 500), flags=cv2.INTER_LANCZOS4, # borderMode=cv2.BORDER_CONSTANT) # ONLY FAILS WHEN OUTPUT SIZE IS LARGE #dsize = (314, 314) # (313, 313) does not cause the error dsize = (500, 500) # (313, 313) does not cause the error dst = np.empty(dsize[::-1], dtype=img.dtype) #new = cv2.warpAffine(img, M[0:2], dsize) print('Warping?') new = cv2.warpAffine(img, M[0:2], dsize, dst) print(dst is new) return new def _test_buffered_generator(): """ Test for standard python calls CommandLine: python -m utool.util_parallel --test-_test_buffered_generator Example: >>> from utool.util_parallel import * # NOQA >>> _test_buffered_generator() """ import utool as ut # ---- Func and Sleep Definitions args = [346373] # 38873 func = ut.is_prime def sleepfunc(prime=args[0]): #time.sleep(.1) import utool as ut [ut.is_prime(prime) for _ in range(2)] _test_buffered_generator_general(func, args, sleepfunc, 10.0) def _test_buffered_generator2(): """ CommandLine: python -m utool.util_parallel --test-_test_buffered_generator2 Looking at about time_thresh=15s or 350 iterations to get buffered over serial. Test for numpy calls Example: >>> from utool.util_parallel import * # NOQA >>> _test_buffered_generator2() """ import numpy as np #import utool as ut # ---- Func and Sleep Definitions from functools import partial rng = np.random.RandomState(0) args = [rng.rand(256, 256) for _ in range(32)] # 38873 func = partial(np.divide, 4.3) def sleepfunc(prime=346373): #time.sleep(.1) import utool as ut [ut.is_prime(prime) for _ in range(2)] _test_buffered_generator_general(func, args, sleepfunc, 15.0) def _test_buffered_generator3(): """ CommandLine: python -m utool.util_parallel --test-_test_buffered_generator3 This test suggests that a ut.buffered_generator is better for disk IO than ut.generate Example: >>> from utool.util_parallel import * # NOQA >>> _test_buffered_generator3() """ import vtool as vt import utool as ut # ---- Func and Sleep Definitions args = list(map(ut.grab_test_imgpath, ut.get_valid_test_imgkeys())) func = vt.imread def sleepfunc(prime=346373): #time.sleep(.1) import utool as ut [ut.is_prime(prime) for _ in range(2)] _test_buffered_generator_general(func, args, sleepfunc, 4.0) def _test_buffered_generator_general(func, args, sleepfunc, target_looptime=1.0): """ # We are going to generate output of func in the background while sleep # func is running in the foreground # --- Hyperparams target_looptime = 1.5 # maximum time to run all loops """ serial_cheat = 1 # approx division factor to run serial less times show_serial = True # target_looptime < 10. # 3.0 with ut.Timer('One* call to func') as t_func: [func(arg) for arg in args] functime = t_func.ellapsed / len(args) #sleepfunc = ut.is_prime with ut.Timer('One call to sleep func') as t_sleep: sleepfunc() sleeptime = t_sleep.ellapsed # compute amount of loops to run _num_loops = round(target_looptime // (functime + sleeptime)) num_data = int(_num_loops // len(args)) num_loops = int(num_data * len(args)) serial_cheat = min(serial_cheat, num_data) data = ut.flatten([args] * num_data) est_tsleep = sleeptime * num_loops est_tfunc = functime * num_loops print('Estimated stats' + ut.dict_str(ut.dict_subset(locals(), [ 'num_loops', 'functime', 'sleeptime', 'est_tsleep', 'est_tfunc', 'serial_cheat']))) if show_serial: with ut.Timer('serial') as t1: # cheat for serial to make it go faster for x in map(func, data[:len(data) // serial_cheat]): sleepfunc() t_serial = serial_cheat * t1.ellapsed print('...toc(\'adjusted_serial\') = %r' % (t_serial)) with ut.Timer('ut.buffered_generator') as t2: gen_ = ut.buffered_generator(map(func, data), buffer_size=2) for x in gen_: sleepfunc() with ut.Timer('ut.generate') as t3: gen_ = ut.generate(func, data, chunksize=2, quiet=1, verbose=0) for x in gen_: sleepfunc() # Compare theoretical vs practical efficiency print('\n Theoretical Results') def parallel_efficiency(ellapsed, est_tsleep, est_tfunc): return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100 if show_serial: print('Theoretical gain (serial) = %.3f%%' % ( parallel_efficiency(t_serial, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),)) print('Theoretical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),)) if show_serial: prac_tfunc = t_serial - est_tsleep print('\n Practical Results') print('Practical gain (serial) = %.3f%%' % ( parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.buffered_generator) = %.3f%%' % ( parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),)) print('Practical gain (ut.generate) = %.2f%%' % ( parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),))
[docs]def buffered_generator(source_gen, buffer_size=2): #, use_multiprocessing=False): r""" Generator that runs a slow source generator in a separate process. My generate function still seems faster on test cases. However, this function is more flexible in its compatability. Args: source_gen (iterable): slow generator buffer_size (int): the maximal number of items to pre-generate (length of the buffer) (default = 2) use_multiprocessing (bool): if False uses GIL-hindered threading instead of multiprocessing (defualt = False). Note: use_multiprocessing = True seems to freeze if passed in a generator built by six.moves.map. References: Taken from Sander Dieleman's data augmentation pipeline https://github.com/benanne/kaggle-ndsb/blob/11a66cdbddee16c69514b9530a727df0ac6e136f/buffering.py CommandLine: python -m utool.util_parallel --test-buffered_generator:0 python -m utool.util_parallel --test-buffered_generator:1 Ignore: >>> #functime = timeit.timeit( >>> # 'ut.is_prime(' + str(prime) + ')', setup='import utool as ut', >>> # number=500) / 1000.0 Example: >>> # UNSTABLE_DOCTEST >>> from utool.util_parallel import * # NOQA >>> import utool as ut >>> num = 2 ** 14 >>> func = ut.is_prime >>> data = [38873] * num >>> data = list(range(num)) >>> with ut.Timer('serial') as t1: ... result1 = list(map(func, data)) >>> with ut.Timer('ut.generate') as t3: ... result3 = list(ut.generate(func, data, chunksize=2, quiet=1, verbose=0)) >>> with ut.Timer('ut.buffered_generator') as t2: ... result2 = list(ut.buffered_generator(map(func, data))) >>> assert len(result1) == num and len(result2) == num and len(result3) == num >>> assert result3 == result2, 'inconsistent results' >>> assert result1 == result2, 'inconsistent results' Example1: >>> # VERYSLLOOWWW_DOCTEST >>> from utool.util_parallel import _test_buffered_generator >>> _test_buffered_generator2() """ if buffer_size < 2: raise RuntimeError("Minimal buffer_ size is 2!") #if use_multiprocessing: # assert False, 'dont use this buffered multiprocessing' # if False: # if USE_GLOBAL_POOL: # pool = __POOL__ # else: # pool = new_pool(num_procs=get_default_numprocs(), # init_worker=init_worker, # maxtasksperchild=None) # Process = pool.Process # else: # Process = multiprocessing.Process # _Queue = multiprocessing.Queue # target = _buffered_generation_process #else: _Queue = queue.Queue Process = KillableThread target = _buffered_generation_thread # the effective buffer_ size is one less, because the generation process # will generate one extra element and block until there is room in the # buffer_. buffer_ = _Queue(maxsize=buffer_size - 1) # previously None was used as a sentinal, which fails when source_gen # genrates None need to make object that it will not be generated by the # process sentinal = StopIteration # mildly hacky use of StopIteration exception process = Process( target=target, args=(iter(source_gen), buffer_, sentinal) ) #if not use_multiprocessing: process.daemon = True process.start() while True: #output = buffer_.get(timeout=1.0) output = buffer_.get() if output is sentinal: raise StopIteration yield output #_iter = iter(buffer_.get, sentinal) #for data in _iter: # if debug: # print('Yeidling') # yield data
def _buffered_generation_thread(source_gen, buffer_, sentinal): """ helper for buffered_generator """ for data in source_gen: buffer_.put(data, block=True) # sentinel: signal the end of the iterator buffer_.put(sentinal) #def _buffered_generation_process(source_gen, buffer_, sentinal): # """ helper for buffered_generator """ # for data in source_gen: # buffer_.put(data, block=True) # # sentinel: signal the end of the iterator # buffer_.put(sentinal) # # unfortunately this does not suffice as a signal: if buffer_.get() was # # called and subsequently the buffer_ is closed, it will block forever. # buffer_.close()
[docs]def process(func, args_list, args_dict={}, force_serial=None, nTasks=None, quiet=QUIET): """ Use ut.generate rather than ut.process Args: func (func): args_list (list or iter): args_dict (dict): force_serial (bool): Returns: result of parallel map(func, args_list) CommandLine: python -m utool.util_parallel --test-process Example: >>> # SLOW_DOCTEST >>> import utool as ut >>> num = 8700 # parallel is slower for smaller numbers >>> flag_generator0 = ut.process(ut.is_prime, list(zip(range(0, num))), force_serial=True) >>> flag_list0 = list(flag_generator0) >>> flag_generator1 = ut.process(ut.is_prime, list(zip(range(0, num))), force_serial=False) >>> flag_list1 = list(flag_generator1) >>> assert flag_list0 == flag_list1 """ if force_serial is None: force_serial = __FORCE_SERIAL__ if USE_GLOBAL_POOL: ensure_pool(quiet=quiet) if nTasks is None: nTasks = len(args_list) if __POOL__ == 1 or force_serial: if not QUIET: print('[util_parallel] executing %d %s tasks in serial' % (nTasks, get_funcname(func))) result_list = _process_serial(func, args_list, args_dict, nTasks=nTasks, quiet=quiet) else: if __POOL__ is None: pool = new_pool(num_procs=get_default_numprocs(), init_worker=init_worker, maxtasksperchild=None) else: pool = __POOL__ if not QUIET: print('[util_parallel] executing %d %s tasks using %d processes' % (nTasks, get_funcname(func), pool._processes)) result_list = _process_parallel(func, args_list, args_dict, nTasks=nTasks, quiet=quiet, pool=pool) return result_list
[docs]def spawn_background_process(func, *args, **kwargs): """ Run a function in the background (like rebuilding some costly data structure) References: http://stackoverflow.com/questions/2046603/is-it-possible-to-run-function-in-a-subprocess-without-threading-or-writing-a-se http://stackoverflow.com/questions/1196074/starting-a-background-process-in-python http://stackoverflow.com/questions/15063963/python-is-thread-still-running Args: func (function): CommandLine: python -m utool.util_parallel --test-spawn_background_process Example: >>> # SLOW_DOCTEST >>> from utool.util_parallel import * # NOQA >>> import utool as ut >>> import time >>> from os.path import join >>> # build test data >>> fname = 'test_bgfunc_output.txt' >>> dpath = ut.get_app_resource_dir('utool') >>> ut.ensuredir(dpath) >>> fpath = join(dpath, fname) >>> # ensure file is not around >>> sleep_time = 1 >>> ut.delete(fpath) >>> assert not ut.checkpath(fpath, verbose=True) >>> def backgrond_func(fpath, sleep_time): ... import utool as ut ... import time ... print('[BG] Background Process has started') ... time.sleep(sleep_time) ... print('[BG] Background Process is writing') ... ut.write_to(fpath, 'background process') ... print('[BG] Background Process has finished') ... #raise AssertionError('test exception') >>> # execute function >>> func = backgrond_func >>> args = (fpath, sleep_time) >>> kwargs = {} >>> print('[FG] Spawning process') >>> threadid = ut.spawn_background_process(func, *args, **kwargs) >>> assert threadid.is_alive() is True, 'thread should be active' >>> print('[FG] Spawned process. threadid=%r' % (threadid,)) >>> # background process should not have finished yet >>> assert not ut.checkpath(fpath, verbose=True) >>> print('[FG] Waiting to check') >>> time.sleep(sleep_time + .1) >>> print('[FG] Finished waiting') >>> # Now the file should be there >>> assert ut.checkpath(fpath, verbose=True) >>> assert threadid.is_alive() is False, 'process should have died' """ import utool as ut func_name = ut.get_funcname(func) name = 'mp.Progress-' + func_name #proc_obj = multiprocessing.Process(target=func, name=name, args=args, kwargs=kwargs) proc_obj = KillableProcess(target=func, name=name, args=args, kwargs=kwargs) #proc_obj.daemon = True #proc_obj.isAlive = proc_obj.is_alive proc_obj.start() return proc_obj
[docs]class KillableProcess(multiprocessing.Process): """ Simple subclass of multiprocessing.Process Gives an additional method to kill all children as well as itself. calls this function on delete. """ #def __del__(self): # self.terminate2() # super(KillableProcess, self).__del__()
[docs] def terminate2(self): if self.is_alive(): #print('[terminate2] Killing process') # Kill all children import psutil webproc = psutil.Process(pid=self.pid) child_proces = webproc.children() [x.terminate() for x in child_proces] self.terminate() else: #print('[terminate2] Already dead') pass #def _process_error_wraper(queue, func, args, kwargs): # pass #def spawn_background_process2(func, *args, **kwargs): # multiprocessing_queue # import utool as ut # func_name = ut.get_funcname(func) # name = 'mp.Progress-' + func_name # proc_obj = multiprocessing.Process(target=func, name=name, args=args, kwargs=kwargs) # #proc_obj.isAlive = proc_obj.is_alive # proc_obj.start()
def _async_raise(tid, excobj): res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(excobj)) if res == 0: raise ValueError('nonexistent thread id') elif res > 1: # """if it returns a number greater than one, you're in trouble, # and you should call it again with exc=NULL to revert the effect""" ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, 0) raise SystemError('PyThreadState_SetAsyncExc failed')
[docs]class KillableThread(threading.Thread): """ References: http://code.activestate.com/recipes/496960-thread2-killable-threads/ http://tomerfiliba.com/recipes/Thread2/ """
[docs] def raise_exc(self, excobj): assert self.isAlive(), 'thread must be started' for tid, tobj in threading._active.items(): if tobj is self: _async_raise(tid, excobj) return # the thread was alive when we entered the loop, but was not found # in the dict, hence it must have been already terminated. should we raise # an exception here? silently ignore?
[docs] def terminate(self): # must raise the SystemExit type, instead of a SystemExit() instance # due to a bug in PyThreadState_SetAsyncExc try: self.raise_exc(SystemExit) except ValueError: pass
[docs]def spawn_background_thread(func, *args, **kwargs): #threadobj = IMPLEMENTATION_NUM thread_obj = KillableThread(target=func, args=args, kwargs=kwargs) thread_obj.start() return thread_obj
[docs]def spawn_background_daemon_thread(func, *args, **kwargs): #threadobj = IMPLEMENTATION_NUM thread_obj = KillableThread(target=func, args=args, kwargs=kwargs) thread_obj.daemon = True thread_obj.start() return thread_obj
def _spawn_background_thread0(func, *args, **kwargs): thread_id = _thread.start_new_thread(func, args, kwargs) return thread_id if __name__ == '__main__': """ Ignore: timing things python reset_dbs.py --time-generate python reset_dbs.py --time-generate --force-serial python reset_dbs.py --time-generate --preinit python reset_dbs.py --time-generate --force-serial CommandLine: python -m utool.util_parallel python -m utool.util_parallel --allexamples --testslow coverage run -m utool.util_parallel --allexamples coverage run -m utool.util_parallel --allexamples --testslow coverage report html -m utool/util_parallel.py coverage html """ #import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()