flextream.batch_handler
Functions and classes related to batching, and shipping data out to event hubs
1""" 2Functions and classes related to batching, and shipping data out to event hubs 3""" 4 5import threading 6import json 7from dataclasses import dataclass, field 8from typing import Any 9 10from azure.eventhub import EventHubProducerClient, EventDataBatch, EventData 11from azure.identity import DefaultAzureCredential 12 13 14def send_to_eventhub( 15 message: dict | str | bytes, 16 namespace: str, 17 eventhub: str, 18 latency: int = 30, 19 credential: Any = None, 20) -> None: 21 """ 22 Create/load BatchHandler, and send message to eventhub. 23 24 To avoid bottlenecks, messages will be batched up 25 and sent in background. `latency` keyword defines the maximum number of 26 seconds a message will be held onto before sending to eventhubs. 27 28 Having a latency of 0 will mean that messages are immediately sent in function call, 29 but this behaviour is not recommended for performance reasons. 30 31 Note, namespace should be fully qualified of form 32 "namespace-name.servicebus.windows.net". 33 34 Optional keyword `credential` can be used to pass in an `azure.identity` 35 credential object. Otherwise, will default to `DefaultAzureCredential`. 36 """ 37 BatchHandler.from_namespace( 38 namespace, 39 eventhub, 40 latency=latency, 41 credential=credential, 42 ).append(message) 43 44 45_batch_handler_cache: dict[tuple[str, str], "BatchHandler"] = {} 46 47 48def _cachable_batch_handler_factor( 49 namespace: str, 50 eventhub: str, 51 latency: int = 30, 52 credential: Any = None, 53) -> "BatchHandler": 54 """ 55 Private module function to handle caching of BatchHandler objects 56 based on namespace and eventhub. 57 """ 58 existing: BatchHandler | None = _batch_handler_cache.get((namespace, eventhub)) 59 if existing: 60 existing.latency = latency 61 return existing 62 client: EventHubProducerClient = EventHubProducerClient( 63 fully_qualified_namespace=namespace, 64 eventhub_name=eventhub, 65 credential=credential or DefaultAzureCredential(), 66 ) 67 batch: EventDataBatch = client.create_batch() 68 handler = BatchHandler( 69 client=client, 70 batch=batch, 71 latency=latency, 72 ) 73 _batch_handler_cache[(namespace, eventhub)] = handler 74 return handler 75 76 77@dataclass 78class BatchHandler: 79 """ 80 Class to handle appending to, and building up of batches for efficient event 81 hub use. 82 83 Note: Uses threading locks to avoid race conditions, which will *only* hold 84 if called using threading executors rather than asyncio. 85 """ 86 87 client: EventHubProducerClient 88 batch: EventDataBatch 89 latency: int | float 90 _lock: threading.Lock = field(default_factory=threading.Lock) 91 _waiting: bool = False 92 _timer: threading.Timer | None = None 93 94 @classmethod 95 def from_namespace( 96 cls, 97 namespace: str, 98 eventhub: str, 99 latency: int | float = 30, 100 credential: Any = None, 101 ) -> "BatchHandler": 102 """ 103 Class method to create a batch handler object from a given azure namespace 104 and eventhub. 105 """ 106 return _cachable_batch_handler_factor( 107 namespace=namespace, 108 eventhub=eventhub, 109 latency=latency, 110 credential=credential, 111 ) 112 113 def _send_and_flush(self) -> None: 114 """ 115 Send batch, and replace with new empty batch 116 """ 117 with self._lock: 118 self._waiting = False 119 if self.batch.size_in_bytes <= 0: 120 return # we'll exit out if no data 121 with self.client: 122 self.client.send_batch(self.batch) 123 self.batch = self.client.create_batch() 124 125 def append(self, msg: str | bytes | dict) -> None: 126 """ 127 Append a message onto the batch, sending only if necessary to make space. 128 """ 129 if isinstance(msg, dict): 130 msg = json.dumps(msg) 131 with self._lock: 132 try: 133 self.batch.add(EventData(msg)) 134 except ValueError: # batch is at max capacity 135 self._send_and_flush() 136 self.batch.add(EventData(msg)) 137 if not self._waiting: 138 self._timer = threading.Timer(self.latency, self._send_and_flush) 139 self._timer.daemon = True 140 self._timer.start() 141 self._waiting = True
15def send_to_eventhub( 16 message: dict | str | bytes, 17 namespace: str, 18 eventhub: str, 19 latency: int = 30, 20 credential: Any = None, 21) -> None: 22 """ 23 Create/load BatchHandler, and send message to eventhub. 24 25 To avoid bottlenecks, messages will be batched up 26 and sent in background. `latency` keyword defines the maximum number of 27 seconds a message will be held onto before sending to eventhubs. 28 29 Having a latency of 0 will mean that messages are immediately sent in function call, 30 but this behaviour is not recommended for performance reasons. 31 32 Note, namespace should be fully qualified of form 33 "namespace-name.servicebus.windows.net". 34 35 Optional keyword `credential` can be used to pass in an `azure.identity` 36 credential object. Otherwise, will default to `DefaultAzureCredential`. 37 """ 38 BatchHandler.from_namespace( 39 namespace, 40 eventhub, 41 latency=latency, 42 credential=credential, 43 ).append(message)
Create/load BatchHandler, and send message to eventhub.
To avoid bottlenecks, messages will be batched up
and sent in background. latency
keyword defines the maximum number of
seconds a message will be held onto before sending to eventhubs.
Having a latency of 0 will mean that messages are immediately sent in function call, but this behaviour is not recommended for performance reasons.
Note, namespace should be fully qualified of form "namespace-name.servicebus.windows.net".
Optional keyword credential
can be used to pass in an azure.identity
credential object. Otherwise, will default to DefaultAzureCredential
.
78@dataclass 79class BatchHandler: 80 """ 81 Class to handle appending to, and building up of batches for efficient event 82 hub use. 83 84 Note: Uses threading locks to avoid race conditions, which will *only* hold 85 if called using threading executors rather than asyncio. 86 """ 87 88 client: EventHubProducerClient 89 batch: EventDataBatch 90 latency: int | float 91 _lock: threading.Lock = field(default_factory=threading.Lock) 92 _waiting: bool = False 93 _timer: threading.Timer | None = None 94 95 @classmethod 96 def from_namespace( 97 cls, 98 namespace: str, 99 eventhub: str, 100 latency: int | float = 30, 101 credential: Any = None, 102 ) -> "BatchHandler": 103 """ 104 Class method to create a batch handler object from a given azure namespace 105 and eventhub. 106 """ 107 return _cachable_batch_handler_factor( 108 namespace=namespace, 109 eventhub=eventhub, 110 latency=latency, 111 credential=credential, 112 ) 113 114 def _send_and_flush(self) -> None: 115 """ 116 Send batch, and replace with new empty batch 117 """ 118 with self._lock: 119 self._waiting = False 120 if self.batch.size_in_bytes <= 0: 121 return # we'll exit out if no data 122 with self.client: 123 self.client.send_batch(self.batch) 124 self.batch = self.client.create_batch() 125 126 def append(self, msg: str | bytes | dict) -> None: 127 """ 128 Append a message onto the batch, sending only if necessary to make space. 129 """ 130 if isinstance(msg, dict): 131 msg = json.dumps(msg) 132 with self._lock: 133 try: 134 self.batch.add(EventData(msg)) 135 except ValueError: # batch is at max capacity 136 self._send_and_flush() 137 self.batch.add(EventData(msg)) 138 if not self._waiting: 139 self._timer = threading.Timer(self.latency, self._send_and_flush) 140 self._timer.daemon = True 141 self._timer.start() 142 self._waiting = True
Class to handle appending to, and building up of batches for efficient event hub use.
Note: Uses threading locks to avoid race conditions, which will only hold if called using threading executors rather than asyncio.
95 @classmethod 96 def from_namespace( 97 cls, 98 namespace: str, 99 eventhub: str, 100 latency: int | float = 30, 101 credential: Any = None, 102 ) -> "BatchHandler": 103 """ 104 Class method to create a batch handler object from a given azure namespace 105 and eventhub. 106 """ 107 return _cachable_batch_handler_factor( 108 namespace=namespace, 109 eventhub=eventhub, 110 latency=latency, 111 credential=credential, 112 )
Class method to create a batch handler object from a given azure namespace and eventhub.
126 def append(self, msg: str | bytes | dict) -> None: 127 """ 128 Append a message onto the batch, sending only if necessary to make space. 129 """ 130 if isinstance(msg, dict): 131 msg = json.dumps(msg) 132 with self._lock: 133 try: 134 self.batch.add(EventData(msg)) 135 except ValueError: # batch is at max capacity 136 self._send_and_flush() 137 self.batch.add(EventData(msg)) 138 if not self._waiting: 139 self._timer = threading.Timer(self.latency, self._send_and_flush) 140 self._timer.daemon = True 141 self._timer.start() 142 self._waiting = True
Append a message onto the batch, sending only if necessary to make space.