flextream.batch_handler

Functions and classes related to batching, and shipping data out to event hubs

  1"""
  2Functions and classes related to batching, and shipping data out to event hubs
  3"""
  4
  5import threading
  6import json
  7from dataclasses import dataclass, field
  8from typing import Any
  9
 10from azure.eventhub import EventHubProducerClient, EventDataBatch, EventData
 11from azure.identity import DefaultAzureCredential
 12
 13
 14def send_to_eventhub(
 15    message: dict | str | bytes,
 16    namespace: str,
 17    eventhub: str,
 18    latency: int = 30,
 19    credential: Any = None,
 20) -> None:
 21    """
 22    Create/load BatchHandler, and send message to eventhub.
 23
 24    To avoid bottlenecks, messages will be batched up
 25    and sent in background. `latency` keyword defines the maximum number of
 26    seconds a message will be held onto before sending to eventhubs.
 27
 28    Having a latency of 0 will mean that messages are immediately sent in function call,
 29    but this behaviour is not recommended for performance reasons.
 30
 31    Note, namespace should be fully qualified of form
 32    "namespace-name.servicebus.windows.net".
 33
 34    Optional keyword `credential` can be used to pass in an `azure.identity`
 35    credential object. Otherwise, will default to `DefaultAzureCredential`.
 36    """
 37    BatchHandler.from_namespace(
 38        namespace,
 39        eventhub,
 40        latency=latency,
 41        credential=credential,
 42    ).append(message)
 43
 44
 45_batch_handler_cache: dict[tuple[str, str], "BatchHandler"] = {}
 46
 47
 48def _cachable_batch_handler_factor(
 49    namespace: str,
 50    eventhub: str,
 51    latency: int = 30,
 52    credential: Any = None,
 53) -> "BatchHandler":
 54    """
 55    Private module function to handle caching of BatchHandler objects
 56    based on namespace and eventhub.
 57    """
 58    existing: BatchHandler | None = _batch_handler_cache.get((namespace, eventhub))
 59    if existing:
 60        existing.latency = latency
 61        return existing
 62    client: EventHubProducerClient = EventHubProducerClient(
 63        fully_qualified_namespace=namespace,
 64        eventhub_name=eventhub,
 65        credential=credential or DefaultAzureCredential(),
 66    )
 67    batch: EventDataBatch = client.create_batch()
 68    handler = BatchHandler(
 69        client=client,
 70        batch=batch,
 71        latency=latency,
 72    )
 73    _batch_handler_cache[(namespace, eventhub)] = handler
 74    return handler
 75
 76
 77@dataclass
 78class BatchHandler:
 79    """
 80    Class to handle appending to, and building up of batches for efficient event
 81    hub use.
 82
 83    Note: Uses threading locks to avoid race conditions, which will *only* hold
 84    if called using threading executors rather than asyncio.
 85    """
 86
 87    client: EventHubProducerClient
 88    batch: EventDataBatch
 89    latency: int | float
 90    _lock: threading.Lock = field(default_factory=threading.Lock)
 91    _waiting: bool = False
 92    _timer: threading.Timer | None = None
 93
 94    @classmethod
 95    def from_namespace(
 96        cls,
 97        namespace: str,
 98        eventhub: str,
 99        latency: int | float = 30,
100        credential: Any = None,
101    ) -> "BatchHandler":
102        """
103        Class method to create a batch handler object from a given azure namespace
104        and eventhub.
105        """
106        return _cachable_batch_handler_factor(
107            namespace=namespace,
108            eventhub=eventhub,
109            latency=latency,
110            credential=credential,
111        )
112
113    def _send_and_flush(self) -> None:
114        """
115        Send batch, and replace with new empty batch
116        """
117        with self._lock:
118            self._waiting = False
119            if self.batch.size_in_bytes <= 0:
120                return  # we'll exit out if no data
121            with self.client:
122                self.client.send_batch(self.batch)
123            self.batch = self.client.create_batch()
124
125    def append(self, msg: str | bytes | dict) -> None:
126        """
127        Append a message onto the batch, sending only if necessary to make space.
128        """
129        if isinstance(msg, dict):
130            msg = json.dumps(msg)
131        with self._lock:
132            try:
133                self.batch.add(EventData(msg))
134            except ValueError:  # batch is at max capacity
135                self._send_and_flush()
136                self.batch.add(EventData(msg))
137            if not self._waiting:
138                self._timer = threading.Timer(self.latency, self._send_and_flush)
139                self._timer.daemon = True
140                self._timer.start()
141                self._waiting = True
def send_to_eventhub( message: dict | str | bytes, namespace: str, eventhub: str, latency: int = 30, credential: Any = None) -> None:
15def send_to_eventhub(
16    message: dict | str | bytes,
17    namespace: str,
18    eventhub: str,
19    latency: int = 30,
20    credential: Any = None,
21) -> None:
22    """
23    Create/load BatchHandler, and send message to eventhub.
24
25    To avoid bottlenecks, messages will be batched up
26    and sent in background. `latency` keyword defines the maximum number of
27    seconds a message will be held onto before sending to eventhubs.
28
29    Having a latency of 0 will mean that messages are immediately sent in function call,
30    but this behaviour is not recommended for performance reasons.
31
32    Note, namespace should be fully qualified of form
33    "namespace-name.servicebus.windows.net".
34
35    Optional keyword `credential` can be used to pass in an `azure.identity`
36    credential object. Otherwise, will default to `DefaultAzureCredential`.
37    """
38    BatchHandler.from_namespace(
39        namespace,
40        eventhub,
41        latency=latency,
42        credential=credential,
43    ).append(message)

Create/load BatchHandler, and send message to eventhub.

To avoid bottlenecks, messages will be batched up and sent in background. latency keyword defines the maximum number of seconds a message will be held onto before sending to eventhubs.

Having a latency of 0 will mean that messages are immediately sent in function call, but this behaviour is not recommended for performance reasons.

Note, namespace should be fully qualified of form "namespace-name.servicebus.windows.net".

Optional keyword credential can be used to pass in an azure.identity credential object. Otherwise, will default to DefaultAzureCredential.

@dataclass
class BatchHandler:
 78@dataclass
 79class BatchHandler:
 80    """
 81    Class to handle appending to, and building up of batches for efficient event
 82    hub use.
 83
 84    Note: Uses threading locks to avoid race conditions, which will *only* hold
 85    if called using threading executors rather than asyncio.
 86    """
 87
 88    client: EventHubProducerClient
 89    batch: EventDataBatch
 90    latency: int | float
 91    _lock: threading.Lock = field(default_factory=threading.Lock)
 92    _waiting: bool = False
 93    _timer: threading.Timer | None = None
 94
 95    @classmethod
 96    def from_namespace(
 97        cls,
 98        namespace: str,
 99        eventhub: str,
100        latency: int | float = 30,
101        credential: Any = None,
102    ) -> "BatchHandler":
103        """
104        Class method to create a batch handler object from a given azure namespace
105        and eventhub.
106        """
107        return _cachable_batch_handler_factor(
108            namespace=namespace,
109            eventhub=eventhub,
110            latency=latency,
111            credential=credential,
112        )
113
114    def _send_and_flush(self) -> None:
115        """
116        Send batch, and replace with new empty batch
117        """
118        with self._lock:
119            self._waiting = False
120            if self.batch.size_in_bytes <= 0:
121                return  # we'll exit out if no data
122            with self.client:
123                self.client.send_batch(self.batch)
124            self.batch = self.client.create_batch()
125
126    def append(self, msg: str | bytes | dict) -> None:
127        """
128        Append a message onto the batch, sending only if necessary to make space.
129        """
130        if isinstance(msg, dict):
131            msg = json.dumps(msg)
132        with self._lock:
133            try:
134                self.batch.add(EventData(msg))
135            except ValueError:  # batch is at max capacity
136                self._send_and_flush()
137                self.batch.add(EventData(msg))
138            if not self._waiting:
139                self._timer = threading.Timer(self.latency, self._send_and_flush)
140                self._timer.daemon = True
141                self._timer.start()
142                self._waiting = True

Class to handle appending to, and building up of batches for efficient event hub use.

Note: Uses threading locks to avoid race conditions, which will only hold if called using threading executors rather than asyncio.

BatchHandler( client: azure.eventhub._producer_client.EventHubProducerClient, batch: azure.eventhub._common.EventDataBatch, latency: int | float, _lock: <built-in function allocate_lock> = <factory>, _waiting: bool = False, _timer: threading.Timer | None = None)
client: azure.eventhub._producer_client.EventHubProducerClient
batch: azure.eventhub._common.EventDataBatch
latency: int | float
@classmethod
def from_namespace( cls, namespace: str, eventhub: str, latency: int | float = 30, credential: Any = None) -> BatchHandler:
 95    @classmethod
 96    def from_namespace(
 97        cls,
 98        namespace: str,
 99        eventhub: str,
100        latency: int | float = 30,
101        credential: Any = None,
102    ) -> "BatchHandler":
103        """
104        Class method to create a batch handler object from a given azure namespace
105        and eventhub.
106        """
107        return _cachable_batch_handler_factor(
108            namespace=namespace,
109            eventhub=eventhub,
110            latency=latency,
111            credential=credential,
112        )

Class method to create a batch handler object from a given azure namespace and eventhub.

def append(self, msg: str | bytes | dict) -> None:
126    def append(self, msg: str | bytes | dict) -> None:
127        """
128        Append a message onto the batch, sending only if necessary to make space.
129        """
130        if isinstance(msg, dict):
131            msg = json.dumps(msg)
132        with self._lock:
133            try:
134                self.batch.add(EventData(msg))
135            except ValueError:  # batch is at max capacity
136                self._send_and_flush()
137                self.batch.add(EventData(msg))
138            if not self._waiting:
139                self._timer = threading.Timer(self.latency, self._send_and_flush)
140                self._timer.daemon = True
141                self._timer.start()
142                self._waiting = True

Append a message onto the batch, sending only if necessary to make space.