Source code for summer.pcg

# Copyright (C) 2009-2020 Martin Slouf <martinslouf@users.sourceforge.net>
#
# This file is a part of Summer.
#
# Summer is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""``pcg`` module is more specific producer--consumer implementation based
on common use case: If you need to iterate in parallel over a collection of
input values and invoke an operation for each item.

Typical usage::

    class MyConsumer(Consumer):

    def __init__(self, progress: Progress):
        self.progress = progress

    def consume(self, produced_object):
        # do whatever is required to do
        self...
        # indicate progress -- for example to some gui listener (progressbar, ...)
        self.progress.next_step()

    if __name__ == "__main__":
        iterable = list(...)
        consumer = MyConsumer(self, progress)
        pcg = ProducerConsumerWithGenerator(iterable, ProducerWithGenerator(), consumer)
        pcg.run()

Producer is replaced with :py:class:`ProducerWithGenerator` which may be
left as is usually -- it automatically iterates over provided iterable
returning one value at a time.  You can override
:py:meth:`ProducerWithGenerator.produce_from_slice` method which takes
single argument -- the current iterator value.

You can also leverage :py:func:`summer.utils.chunks` function to split
large collection into smaller ones and produce chunks of data to decrease
race conditions in iteration over single iterator --
:py:class:`summer.pc.Consumer` class consumes the whole chunks, not single
items, which may improve perfomance.

"""

import collections
import logging
import threading

from .ex import UnsupportedMethodException
from .pc import (
    Producer,
    Consumer,
    ProducerThread,
    ProducerConsumer,
)

logger = logging.getLogger(__name__)


[docs]class ThreadSafeIterator(collections.Iterator): """Implements thread safe iteration over an iterable."""
[docs] def __init__(self, iterable: collections.Iterable): self.iterable = iterable self.lock = threading.Lock() self.iterator = iter(self.iterable)
[docs] def __next__(self): with self.lock: return next(self.iterator)
[docs]class ProducerWithGenerator(Producer): """Specific version of :py:class:`summer.pc.Producer` for :py:class:`ProducerConsumerWithGenerator` engine. """
[docs] def produce(self): msg = "use produce(generator_slice) instead of produce()" raise UnsupportedMethodException(msg)
[docs] def produce_from_slice(self, generator_slice: object) -> object: """Take a slice and produce whatever needs to be produced. Default implementation just returns the slice (which is handy in case we just want to iterate over provided iterable). Args: generator_slice: single item from iteration, whatever it may be Returns: object: whatever is desired, default implementation just returns the passed item, which is reasonable if you want just to iterate in parallel over iterable. """ return generator_slice
[docs]class ProducerConsumerWithGenerator(ProducerConsumer): """Specific implementation of :py:class:`summer.pc.ProducerConsumer` that adds thread-safe iteration over provided *iterable* object passing single values to :py:class:`ProducerWithGenerator` instances one at a time. """
[docs] def __init__(self, iterable: collections.Iterable, producer: ProducerWithGenerator, consumer: Consumer, producer_thread_count=ProducerConsumer.DEFAULT_THREAD_COUNT, consumer_thread_count=ProducerConsumer.DEFAULT_THREAD_COUNT): """Creates :py:class:`ProducerConsumerWithGenerator` instance. Args: iterable (collections.Iterable): iterable over input values producer (Producer): producer instance supplied by caller consumer (Consumer): consumer instance supplied by caller producer_thread_count (int): number of producer threads consumer_thread_count (int): number of consumer threads """ ProducerConsumer.__init__(self, producer, consumer, producer_thread_count, consumer_thread_count) self.iterable = iterable
def _start_producer_threads(self): generator = ThreadSafeIterator(self.iterable) threads = [] for i in range(0, self.producer_thread_count): thread = ProducerThreadWithGenerator(self, self.producer, generator) thread.start() threads.append(thread) return threads
[docs]class ProducerThreadWithGenerator(ProducerThread): """Thread executing producer instances (ie. :py:class:`ProducerWithGenerator`). """
[docs] def __init__(self, producer_consumer: ProducerConsumerWithGenerator, producer: ProducerWithGenerator, generator: ThreadSafeIterator): ProducerThread.__init__(self, producer_consumer, producer) self.generator = generator
[docs] def run(self): for i in self.generator: obj = self.producer.produce_from_slice(i) self.producer_consumer.object_produced(obj) self.producer_consumer.object_produced(Producer.END_OF_PRODUCTION)