Coverage for /Users/eugene/Development/legion-utils/legion_utils/princeps.py: 0%
95 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 20:15 -0400
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-01 20:15 -0400
1from concurrent import futures
2from contextlib import contextmanager
3from dataclasses import dataclass
4from multiprocessing import Process, Event
5from threading import get_ident
6from time import time, sleep
7from typing import Dict, Iterable, Optional, ContextManager
9import grpc
10from tenacity import retry, wait_exponential, stop_after_attempt, wait_random
12from checkin_pb2 import CheckInRequest
13from checkin_pb2_grpc import PrincepsServicer, add_PrincepsServicer_to_server, PrincepsStub
14from legion_utils import log
15from legion_utils.utils import ReadWriteLock
17UNIX_SOCKET = "unix:///tmp/princeps.sock"
20@dataclass(frozen=True)
21class CheckInAlertParams:
22 next_checkin_before: int
23 warn_after: int
24 error_after: int
25 critical_after: int
26 alert_ttl: int
28 @staticmethod
29 def of(request: CheckInRequest) -> 'CheckInAlertParams':
30 return CheckInAlertParams(next_checkin_before=request.next_checkin_before,
31 warn_after=request.warn_after,
32 error_after=request.error_after,
33 critical_after=request.critical_after,
34 alert_ttl=request.alert_ttl)
36 def _should_warn(self) -> bool:
37 return self.warn_after >= 0 and time() >= (self.next_checkin_before + self.warn_after)
39 @property
40 def should_warn(self) -> bool:
41 return self._should_warn() and not (self.should_critical or self.should_error)
43 def _should_error(self) -> bool:
44 return self.error_after >= 0 and time() >= (self.next_checkin_before + self.error_after)
46 @property
47 def should_error(self) -> bool:
48 return self._should_error() and not self.should_critical
50 @property
51 def should_critical(self) -> bool:
52 return self.critical_after >= 0 and time() >= (self.next_checkin_before + self.critical_after)
54 @property
55 def failed_to_checkin(self) -> bool:
56 return time() >= self.next_checkin_before
59class CheckInRegistry:
60 def __init__(self) -> None:
61 self._registry: Dict[str, CheckInAlertParams] = {}
62 self._lock = ReadWriteLock()
64 def check_in(self, service_id: str, entry: CheckInAlertParams) -> None:
65 with self._lock.write():
66 self._registry[service_id] = entry
68 def failed_to_checkin(self) -> Iterable[str]:
69 with self._lock.read():
70 return {s for s, e in self._registry.items() if e.failed_to_checkin}
72 def to_warn(self) -> Iterable[str]:
73 with self._lock.read():
74 return {s for s, e in self._registry.items() if e.should_warn}
76 def to_error(self) -> Iterable[str]:
77 with self._lock.read():
78 return {s for s, e in self._registry.items() if e.should_error}
80 def to_critical(self) -> Iterable[str]:
81 with self._lock.read():
82 return {s for s, e in self._registry.items() if e.should_critical}
84 def __str__(self) -> str:
85 with self._lock.read():
86 return f"{id(self)}({get_ident()}):{self._registry.__str__()}"
89class CheckInService(PrincepsServicer):
90 def __init__(self, registry: CheckInRegistry) -> None:
91 self._registry = registry
93 def CheckIn(self, request: CheckInRequest, context) -> CheckInRequest:
94 self._registry.check_in(request.service_id, CheckInAlertParams.of(request))
95 return request
98@retry(wait=wait_random(min=0.5, max=3),
99 reraise=True, stop=stop_after_attempt(5))
100def princeps_checkin(service_id: str,
101 next_checkin_before: int,
102 alert_ttl: int,
103 warn_after: int = -1,
104 error_after: int = -1,
105 critical_after: int = -1) -> bool:
106 with grpc.insecure_channel(UNIX_SOCKET) as channel:
107 stub = PrincepsStub(channel)
108 result = stub.CheckIn(CheckInRequest(next_checkin_before=next_checkin_before, service_id=service_id))
109 return result.service_id == service_id and result.next_checkin_before == next_checkin_before
112def _grpc_serve(halt: Event, address: Optional[str] = None):
113 server = grpc.server(futures.ThreadPoolExecutor(max_workers=4))
114 registry = CheckInRegistry()
115 add_PrincepsServicer_to_server(
116 CheckInService(registry), server
117 )
118 server.add_insecure_port(address or UNIX_SOCKET)
119 server.start()
120 halt.wait()
121 server.stop(grace=1.0)
124@contextmanager
125def grpc_serve() -> ContextManager[Event]:
126 halt = Event()
127 server_proc = Process(target=_grpc_serve, args=(halt,))
128 try:
129 server_proc.start()
130 yield halt
131 finally:
132 halt.set()
133 server_proc.join(timeout=2.0)
136if __name__ == '__main__':
137 with grpc_serve() as halt_flag:
138 princeps_checkin("test", 1, 30)
139 halt_flag.set()