Coverage for /Users/eugene/Development/legion-utils/legion_utils/princeps.py: 0%

95 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-01 20:15 -0400

1from concurrent import futures 

2from contextlib import contextmanager 

3from dataclasses import dataclass 

4from multiprocessing import Process, Event 

5from threading import get_ident 

6from time import time, sleep 

7from typing import Dict, Iterable, Optional, ContextManager 

8 

9import grpc 

10from tenacity import retry, wait_exponential, stop_after_attempt, wait_random 

11 

12from checkin_pb2 import CheckInRequest 

13from checkin_pb2_grpc import PrincepsServicer, add_PrincepsServicer_to_server, PrincepsStub 

14from legion_utils import log 

15from legion_utils.utils import ReadWriteLock 

16 

17UNIX_SOCKET = "unix:///tmp/princeps.sock" 

18 

19 

20@dataclass(frozen=True) 

21class CheckInAlertParams: 

22 next_checkin_before: int 

23 warn_after: int 

24 error_after: int 

25 critical_after: int 

26 alert_ttl: int 

27 

28 @staticmethod 

29 def of(request: CheckInRequest) -> 'CheckInAlertParams': 

30 return CheckInAlertParams(next_checkin_before=request.next_checkin_before, 

31 warn_after=request.warn_after, 

32 error_after=request.error_after, 

33 critical_after=request.critical_after, 

34 alert_ttl=request.alert_ttl) 

35 

36 def _should_warn(self) -> bool: 

37 return self.warn_after >= 0 and time() >= (self.next_checkin_before + self.warn_after) 

38 

39 @property 

40 def should_warn(self) -> bool: 

41 return self._should_warn() and not (self.should_critical or self.should_error) 

42 

43 def _should_error(self) -> bool: 

44 return self.error_after >= 0 and time() >= (self.next_checkin_before + self.error_after) 

45 

46 @property 

47 def should_error(self) -> bool: 

48 return self._should_error() and not self.should_critical 

49 

50 @property 

51 def should_critical(self) -> bool: 

52 return self.critical_after >= 0 and time() >= (self.next_checkin_before + self.critical_after) 

53 

54 @property 

55 def failed_to_checkin(self) -> bool: 

56 return time() >= self.next_checkin_before 

57 

58 

59class CheckInRegistry: 

60 def __init__(self) -> None: 

61 self._registry: Dict[str, CheckInAlertParams] = {} 

62 self._lock = ReadWriteLock() 

63 

64 def check_in(self, service_id: str, entry: CheckInAlertParams) -> None: 

65 with self._lock.write(): 

66 self._registry[service_id] = entry 

67 

68 def failed_to_checkin(self) -> Iterable[str]: 

69 with self._lock.read(): 

70 return {s for s, e in self._registry.items() if e.failed_to_checkin} 

71 

72 def to_warn(self) -> Iterable[str]: 

73 with self._lock.read(): 

74 return {s for s, e in self._registry.items() if e.should_warn} 

75 

76 def to_error(self) -> Iterable[str]: 

77 with self._lock.read(): 

78 return {s for s, e in self._registry.items() if e.should_error} 

79 

80 def to_critical(self) -> Iterable[str]: 

81 with self._lock.read(): 

82 return {s for s, e in self._registry.items() if e.should_critical} 

83 

84 def __str__(self) -> str: 

85 with self._lock.read(): 

86 return f"{id(self)}({get_ident()}):{self._registry.__str__()}" 

87 

88 

89class CheckInService(PrincepsServicer): 

90 def __init__(self, registry: CheckInRegistry) -> None: 

91 self._registry = registry 

92 

93 def CheckIn(self, request: CheckInRequest, context) -> CheckInRequest: 

94 self._registry.check_in(request.service_id, CheckInAlertParams.of(request)) 

95 return request 

96 

97 

98@retry(wait=wait_random(min=0.5, max=3), 

99 reraise=True, stop=stop_after_attempt(5)) 

100def princeps_checkin(service_id: str, 

101 next_checkin_before: int, 

102 alert_ttl: int, 

103 warn_after: int = -1, 

104 error_after: int = -1, 

105 critical_after: int = -1) -> bool: 

106 with grpc.insecure_channel(UNIX_SOCKET) as channel: 

107 stub = PrincepsStub(channel) 

108 result = stub.CheckIn(CheckInRequest(next_checkin_before=next_checkin_before, service_id=service_id)) 

109 return result.service_id == service_id and result.next_checkin_before == next_checkin_before 

110 

111 

112def _grpc_serve(halt: Event, address: Optional[str] = None): 

113 server = grpc.server(futures.ThreadPoolExecutor(max_workers=4)) 

114 registry = CheckInRegistry() 

115 add_PrincepsServicer_to_server( 

116 CheckInService(registry), server 

117 ) 

118 server.add_insecure_port(address or UNIX_SOCKET) 

119 server.start() 

120 halt.wait() 

121 server.stop(grace=1.0) 

122 

123 

124@contextmanager 

125def grpc_serve() -> ContextManager[Event]: 

126 halt = Event() 

127 server_proc = Process(target=_grpc_serve, args=(halt,)) 

128 try: 

129 server_proc.start() 

130 yield halt 

131 finally: 

132 halt.set() 

133 server_proc.join(timeout=2.0) 

134 

135 

136if __name__ == '__main__': 

137 with grpc_serve() as halt_flag: 

138 princeps_checkin("test", 1, 30) 

139 halt_flag.set()