Coverage for /Users/eugene/Development/legion-utils/legion_utils/instrumentation.py: 66%

121 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-01 20:15 -0400

1from abc import abstractmethod, ABC 

2from functools import wraps 

3from itertools import count 

4from random import randint 

5from threading import Event 

6from time import sleep 

7from traceback import format_exc 

8from typing import Callable, Union, Optional, Dict, Any, List 

9 

10from robotnikmq import RobotnikConfig 

11 

12from legion_utils import WarningMsg, ErrorMsg, CriticalMsg, broadcast_alert_msg 

13from legion_utils.core import HOSTNAME 

14 

15 

16class Alerter(ABC): 

17 def __init__(self, 

18 task_id: str, 

19 exchange: str, 

20 route: str, 

21 default_ttl: int, 

22 config: Optional[RobotnikConfig] = None): 

23 self.task_id = task_id 

24 self.exchange = exchange 

25 self.route = route 

26 self.config = config 

27 self.default_ttl = default_ttl 

28 

29 @abstractmethod 

30 def key(self, task_id: str) -> List[str]: 

31 pass # pragma: no cover 

32 

33 def broadcast_warning(self, contents: Dict[str, Any], 

34 desc: str, 

35 ttl: Optional[int] = None) -> None: 

36 broadcast_alert_msg(exchange=self.exchange, 

37 route=self.route, 

38 config=self.config, 

39 alert=WarningMsg(contents=contents, 

40 key=self.key(self.task_id), 

41 desc=desc, 

42 ttl=(ttl or self.default_ttl))) 

43 

44 def broadcast_error(self, contents: Dict[str, Any], 

45 desc: str, 

46 ttl: Optional[int] = None) -> None: 

47 broadcast_alert_msg(exchange=self.exchange, 

48 route=self.route, 

49 config=self.config, 

50 alert=ErrorMsg(contents=contents, 

51 key=self.key(self.task_id), 

52 desc=desc, 

53 ttl=(ttl or self.default_ttl))) 

54 

55 def broadcast_critical(self, contents: Dict[str, Any], 

56 desc: str, 

57 ttl: Optional[int] = None) -> None: 

58 broadcast_alert_msg(exchange=self.exchange, 

59 route=self.route, 

60 config=self.config, 

61 alert=CriticalMsg(contents=contents, 

62 key=self.key(self.task_id), 

63 desc=desc, 

64 ttl=(ttl or self.default_ttl))) 

65 

66 

67class Runner(Alerter): 

68 def __init__(self, task_id: str, exchange: str, route: str, default_ttl: int, 

69 halt_flag: Optional[Event] = None, config: Optional[RobotnikConfig] = None): 

70 super().__init__(task_id=task_id, exchange=exchange, route=route, default_ttl=default_ttl, config=config) 

71 self.halt_flag = halt_flag or Event() 

72 

73 @abstractmethod 

74 def __call__(self, func: Callable[[], None]) -> None: 

75 pass # pragma: no cover 

76 

77 

78class Service(Runner): 

79 def __init__(self, 

80 task_id: str, 

81 exchange: str, 

82 route: str, 

83 ttl: Optional[int] = None, 

84 start_delay: Union[int, Callable[[], int], None] = None, 

85 relaunch_delay: Optional[int] = None, 

86 jitter: Optional[int] = None, 

87 warn_after_attempts: Union[int, float, None] = None, 

88 error_after_attempts: Union[int, float, None] = None, 

89 critical_after_attempts: Union[int, float, None] = None, 

90 halt_flag: Optional[Event] = None, 

91 config: Optional[RobotnikConfig] = None): 

92 super().__init__(task_id=task_id, exchange=exchange, route=route, 

93 default_ttl=(ttl or 120), halt_flag=halt_flag, config=config) 

94 self.relaunch_delay = relaunch_delay if relaunch_delay is not None else 30 

95 self.start_delay = start_delay if start_delay is not None else (lambda: randint(0, self.relaunch_delay)) 95 ↛ exitline 95 didn't run the lambda on line 95

96 self.jitter = jitter if jitter is not None else 3 

97 self.warn_after_attempts = warn_after_attempts or float('inf') 

98 self.error_after_attempts = error_after_attempts or (1 if warn_after_attempts is None else float('inf')) 

99 self.critical_after_attempts = critical_after_attempts or float('inf') 

100 

101 @property 

102 def _start_delay(self) -> int: 

103 return self.start_delay if not callable(self.start_delay) else self.start_delay() 

104 

105 def delay_start(self) -> None: 

106 sleep(abs(self._start_delay)) 

107 

108 @property 

109 def _relaunch_delay(self) -> int: 

110 return self.relaunch_delay + randint(0 - self.jitter, self.jitter) 

111 

112 def delay_relaunch(self): 

113 sleep(abs(self._relaunch_delay)) 

114 

115 def key(self, task_id: str) -> List[str]: 

116 return [HOSTNAME, 'legion', 'service_failure', task_id] 

117 

118 def __call__(self, func: Callable[[], None]) -> Callable[[], None]: 

119 @wraps(func) 

120 def retry_infinity_wrapper() -> None: 

121 last_traceback: Optional[str] = None 

122 self.delay_start() 

123 for i in count(1): # pragma: no branch 

124 if self.halt_flag.is_set(): 

125 break 

126 try: 

127 func() 

128 except Exception: 

129 last_traceback = format_exc() 

130 finally: 

131 contents = {"task_id": self.task_id, 

132 "last_stack_trace": last_traceback, 

133 "num_failures": i} 

134 if i == 1: 

135 desc = f"Service '{self.task_id}' stopped running" 

136 else: 

137 desc = f"Service '{self.task_id}' stopped running {i} times in a row" 

138 if i >= self.critical_after_attempts: 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true

139 self.broadcast_critical(contents=contents, desc=desc) 

140 elif i >= self.error_after_attempts: 

141 self.broadcast_error(contents=contents, desc=desc) 

142 elif i >= self.warn_after_attempts: 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true

143 self.broadcast_warning(contents=contents, desc=desc) 

144 self.delay_relaunch() 

145 

146 return retry_infinity_wrapper 

147 

148 

149class Periodic(Runner): 

150 def __init__(self, 

151 task_id: str, 

152 exchange: str, 

153 route: str, 

154 delay: int, 

155 ttl: Optional[int] = None, 

156 start_delay: Union[int, Callable[[], int], None] = None, 

157 jitter: Optional[int] = None, 

158 warn_after_failures: Union[int, float, None] = None, 

159 error_after_failures: Union[int, float, None] = None, 

160 critical_after_failures: Union[int, float, None] = None, 

161 halt_flag: Optional[Event] = None, 

162 config: Optional[RobotnikConfig] = None): 

163 super().__init__(task_id=task_id, 

164 exchange=exchange, 

165 route=route, 

166 default_ttl=(ttl or 120), 

167 halt_flag=halt_flag, 

168 config=config) 

169 self.delay = delay 

170 self.start_delay = start_delay if start_delay is not None else (lambda: randint(0, self.delay)) 

171 self.jitter = jitter if jitter is not None else 3 

172 self.warn_after_failures = warn_after_failures or float('inf') 

173 self.error_after_failures = error_after_failures or (1 if warn_after_failures is None else float('inf')) 

174 self.critical_after_failures = critical_after_failures or float('inf') 

175 

176 @property 

177 def _start_delay(self) -> int: 

178 return self.start_delay if not callable(self.start_delay) else self.start_delay() 

179 

180 def delay_start(self) -> None: 

181 sleep(abs(self._start_delay)) 

182 

183 @property 

184 def _jittery_delay(self) -> int: 

185 return self.delay + randint(0 - self.jitter, self.jitter) 

186 

187 def jittery_delay(self): 

188 sleep(abs(self._jittery_delay)) 

189 

190 def key(self, task_id: str) -> List[str]: 

191 return [HOSTNAME, 'legion', 'periodic_task_failure', task_id] 

192 

193 def __call__(self, func: Callable[[], None]) -> Callable[[], None]: 

194 @wraps(func) 

195 def run_infinity_wrapper() -> None: 

196 num_failures = 0 

197 self.delay_start() 

198 for _ in count(): # pragma: no branch 

199 if self.halt_flag.is_set(): 

200 break 

201 try: 

202 func() 

203 num_failures = 0 

204 except Exception: 

205 num_failures += 1 

206 contents = {"task_id": self.task_id, 

207 "last_stack_trace": format_exc(), 

208 "num_failures": num_failures} 

209 desc = f"Periodic task '{self.task_id}' failed {num_failures} times in a row" 

210 if num_failures >= self.critical_after_failures: 

211 self.broadcast_critical(contents=contents, desc=desc) 

212 elif num_failures >= self.error_after_failures: 

213 self.broadcast_error(contents=contents, desc=desc) 

214 elif num_failures >= self.warn_after_failures: 

215 self.broadcast_warning(contents=contents, desc=desc) 

216 self.jittery_delay() 

217 return run_infinity_wrapper