Coverage for /Users/eugene/Development/legion-utils/legion_utils/service.py: 0%

77 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-01 14:05 -0400

1from abc import abstractmethod, ABC 

2from functools import wraps 

3from itertools import count 

4from random import randint 

5from threading import Event 

6from time import sleep 

7from traceback import format_exc 

8from typing import Callable, Union, Optional, Dict, Any, List 

9 

10from robotnikmq import RobotnikConfig 

11 

12from legion_utils import WarningMsg, ErrorMsg, CriticalMsg, broadcast_alert_msg 

13from legion_utils.core import HOSTNAME 

14 

15 

16class Alerter(ABC): 

17 def __init__(self, 

18 task_id: str, 

19 exchange: str, 

20 route: str, 

21 default_ttl: int, 

22 config: Optional[RobotnikConfig] = None): 

23 self.task_id = task_id 

24 self.exchange = exchange 

25 self.route = route 

26 self.config = config 

27 self.default_ttl = default_ttl 

28 

29 @abstractmethod 

30 def key(self, task_id: str) -> List[str]: 

31 pass 

32 

33 def broadcast_warning(self, contents: Dict[str, Any], 

34 desc: str, 

35 ttl: Optional[int] = None) -> None: 

36 broadcast_alert_msg(exchange=self.exchange, 

37 route=self.route, 

38 config=self.config, 

39 alert=WarningMsg(contents=contents, 

40 key=self.key(self.task_id), 

41 desc=desc, 

42 ttl=(ttl or self.default_ttl))) 

43 

44 def broadcast_error(self, contents: Dict[str, Any], 

45 desc: str, 

46 ttl: Optional[int] = None) -> None: 

47 broadcast_alert_msg(exchange=self.exchange, 

48 route=self.route, 

49 config=self.config, 

50 alert=ErrorMsg(contents=contents, 

51 key=self.key(self.task_id), 

52 desc=desc, 

53 ttl=(ttl or self.default_ttl))) 

54 

55 def broadcast_critical(self, contents: Dict[str, Any], 

56 desc: str, 

57 ttl: Optional[int] = None) -> None: 

58 broadcast_alert_msg(exchange=self.exchange, 

59 route=self.route, 

60 config=self.config, 

61 alert=CriticalMsg(contents=contents, 

62 key=self.key(self.task_id), 

63 desc=desc, 

64 ttl=(ttl or self.default_ttl))) 

65 

66 

67class Runner(Alerter): 

68 def __init__(self, task_id: str, exchange: str, route: str, default_ttl: int, 

69 halt_flag: Optional[Event] = None, config: Optional[RobotnikConfig] = None): 

70 super().__init__(task_id=task_id, exchange=exchange, route=route, default_ttl=default_ttl, config=config) 

71 self.halt_flag = halt_flag or Event() 

72 

73 @abstractmethod 

74 def __call__(self, func: Callable[[], None]) -> None: 

75 pass 

76 

77 

78class Service(Runner): 

79 def __init__(self, 

80 task_id: str, 

81 exchange: str, 

82 route: str, 

83 ttl: Optional[int] = None, 

84 start_delay: Union[int, Callable[[], int], None] = None, 

85 relaunch_delay: Optional[int] = None, 

86 jitter: Optional[int] = None, 

87 warn_after_attempts: Union[int, float, None] = None, 

88 error_after_attempts: Union[int, float, None] = None, 

89 critical_after_attempts: Union[int, float, None] = None, 

90 halt_flag: Optional[Event] = None, 

91 config: Optional[RobotnikConfig] = None): 

92 super().__init__(task_id=task_id, exchange=exchange, route=route, 

93 default_ttl=(ttl or 120), halt_flag=halt_flag, config=config) 

94 self.start_delay = start_delay if start_delay is not None else (lambda: randint(0, 30)) 

95 self.relaunch_delay = relaunch_delay if relaunch_delay is not None else 30 

96 self.jitter = jitter if jitter is not None else 3 

97 self.warn_after_attempts = warn_after_attempts or float('inf') 

98 self.error_after_attempts = error_after_attempts or 1 

99 self.critical_after_attempts = critical_after_attempts or float('inf') 

100 

101 @property 

102 def _start_delay(self) -> int: 

103 return self.start_delay if not callable(self.start_delay) else self.start_delay() 

104 

105 def delay_start(self) -> None: 

106 sleep(abs(self._start_delay)) 

107 

108 @property 

109 def _relaunch_delay(self) -> int: 

110 return self.relaunch_delay + randint(0 - self.jitter, self.jitter) 

111 

112 def delay_relaunch(self): 

113 sleep(abs(self._relaunch_delay)) 

114 

115 def key(self, task_id: str) -> List[str]: 

116 return [HOSTNAME, 'legion', 'service_failure', task_id] 

117 

118 def __call__(self, func: Callable[[], None]) -> Callable[[], None]: 

119 @wraps(func) 

120 def retry_infinity_wrapper() -> None: 

121 last_traceback: Optional[str] = None 

122 self.delay_start() 

123 for i in count(): 

124 if self.halt_flag.is_set(): 

125 break 

126 contents = {"task_id": self.task_id, 

127 "last_stack_trace": last_traceback, 

128 "num_failures": i} 

129 desc = f"Service '{self.task_id}' failed and stopped running {i} times in a row" 

130 if i >= self.critical_after_attempts: 

131 self.broadcast_critical(contents=contents, desc=desc) 

132 elif i >= self.error_after_attempts: 

133 self.broadcast_error(contents=contents, desc=desc) 

134 elif i >= self.warn_after_attempts: 

135 self.broadcast_warning(contents=contents, desc=desc) 

136 try: 

137 func() 

138 except Exception: 

139 self.last_traceback = format_exc() 

140 self.delay_relaunch() 

141 

142 return retry_infinity_wrapper