Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/trackables.py: 79%

197 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-03-17 21:54 -0600

1"""Functions for creating & updating the progress/status update doc in Elasticsearch""" 

2 

3import typing as t 

4import logging 

5from es_pii_tool.exceptions import FatalError, MissingArgument, MissingDocument 

6from es_pii_tool.helpers.elastic_api import get_progress_doc, update_doc 

7from es_pii_tool.helpers.utils import now_iso8601 

8 

9if t.TYPE_CHECKING: 

10 from es_pii_tool.job import Job 

11 

12 

13MOD = __name__ 

14 

15# pylint: disable=R0902,W0707 

16 

17 

18class Trackable: 

19 """An individual task or, tracked in Elasticsearch""" 

20 

21 ATTRLIST = ['start_time', 'completed', 'end_time', 'errors', 'logs'] 

22 

23 def __init__( 

24 self, 

25 job: t.Optional['Job'] = None, 

26 index: str = '', 

27 ): 

28 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}') 

29 self.stub = '' 

30 if job: 

31 self.job = job 

32 self.stub = f'Job: {job.name}' 

33 self.index = index 

34 self.task_id = '' 

35 self.stepname = '' 

36 self.doc_id = None 

37 

38 @property 

39 def status(self) -> t.Dict: 

40 """ 

41 The status of the current trackable, or retrieved from an previous 

42 incomplete trackable 

43 """ 

44 return self._status 

45 

46 @status.setter 

47 def status(self, value: t.Dict) -> None: 

48 self._status = value 

49 

50 @property 

51 def start_time(self) -> str: 

52 """The ISO8601 string representing the start time of this trackable""" 

53 return self._start_time 

54 

55 @start_time.setter 

56 def start_time(self, value: str) -> None: 

57 self._start_time = value 

58 

59 @property 

60 def end_time(self) -> str: 

61 """The ISO8601 string representing the end time of this trackable""" 

62 return self._end_time 

63 

64 @end_time.setter 

65 def end_time(self, value: str) -> None: 

66 self._end_time = value 

67 

68 @property 

69 def completed(self) -> bool: 

70 """Is the trackable completed? or Did the trackable complete successfully?""" 

71 return self._completed 

72 

73 @completed.setter 

74 def completed(self, value: bool) -> None: 

75 self._completed = value 

76 

77 @property 

78 def errors(self) -> bool: 

79 """Were errors encountered during this trackable?""" 

80 return self._errors 

81 

82 @errors.setter 

83 def errors(self, value: bool) -> None: 

84 self._errors = value 

85 

86 @property 

87 def logs(self) -> t.Sequence[str]: 

88 """The list of log lines collected during this trackable""" 

89 return self._logs 

90 

91 @logs.setter 

92 def logs(self, value: t.Sequence[str]) -> None: 

93 self._logs = value 

94 

95 def add_log(self, value: str) -> None: 

96 """Append another entry to self.logs""" 

97 if not self.logs: 

98 _ = [] 

99 _.append(f'{now_iso8601()} {value}') 

100 else: 

101 _ = self.logs 

102 _.append(f'{now_iso8601()} {value}') 

103 self.logs = _ 

104 

105 def load_status(self) -> None: 

106 """Load prior status values (or not)""" 

107 for key in self.ATTRLIST: 

108 if self.job.prev_dry_run: 

109 # If our last run was a dry run, set each other attribute to None 

110 setattr(self, key, None) 

111 else: 

112 if key in self.status: 

113 setattr(self, key, self.status[key]) 

114 else: 

115 setattr(self, key, None) 

116 

117 def get_trackable(self) -> t.Dict: 

118 """ 

119 Get any history that may exist for self.stepname of self.task_id of 

120 self.job.name 

121 

122 :returns: The step object from the progress/status update doc 

123 """ 

124 retval = {} 

125 try: 

126 retval = get_progress_doc( 

127 self.job.client, 

128 self.job.index, 

129 self.job.name, 

130 self.task_id, 

131 stepname=self.stepname, 

132 ) 

133 except MissingDocument: 

134 self.logger.debug('Doc tracking %s does not exist yet', self.stub) 

135 return retval 

136 except Exception as exc: 

137 msg = f'Fatal error encountered: {exc.args[0]}' 

138 self.logger.critical(msg) 

139 raise FatalError(msg, exc) 

140 self.doc_id = retval['_id'] 

141 return retval['_source'] 

142 

143 def get_history(self) -> None: 

144 """ 

145 Get the history of self.stepname, if any. Ensure all values are populated 

146 from the doc, or None 

147 """ 

148 self.logger.debug('Pulling any history for %s', self.stub) 

149 self.status = self.get_trackable() 

150 if not self.status: 

151 self.logger.debug('No history for %s', self.stub) 

152 self.load_status() 

153 

154 def report_history(self) -> None: 

155 """ 

156 Get the history of any prior attempt to run self.task_id of self.job.name 

157 Log aspects of the history here. 

158 """ 

159 prefix = f'The prior run of {self.stub}' 

160 if self.start_time: 

161 self.logger.info('%s started at %s', prefix, self.start_time) 

162 if self.completed: 

163 if self.end_time: 

164 self.logger.info('%s completed at %s', prefix, self.end_time) 

165 else: 

166 msg = 'is marked completed but did not record an end time' 

167 self.logger.warning( 

168 '%s started at %s and %s', prefix, self.start_time, msg 

169 ) 

170 if self.errors: 

171 self.logger.warning('%s encountered errors.', prefix) 

172 if self.logs: 

173 # Only report the log if a error is True 

174 self.logger.warning('%s had log(s): %s', prefix, self.logs) 

175 

176 def begin(self) -> None: 

177 """Begin the step and record the current status""" 

178 self.logger.info('Beginning %s', self.stub) 

179 if self.job.dry_run: 

180 msg = 'DRY-RUN: No changes will be made' 

181 self.logger.info(msg) 

182 self.add_log(msg) 

183 self.start_time = now_iso8601() 

184 self.completed = False 

185 self.record() 

186 if not self.doc_id: 

187 self.get_trackable() 

188 self.load_status() 

189 self.logger.debug('self.doc_id = %s', self.doc_id) 

190 

191 def end( 

192 self, 

193 completed: bool = False, 

194 errors: bool = False, 

195 logmsg: t.Union[str, None] = None, 

196 ) -> None: 

197 """End the step and record the current status 

198 

199 :param completed: Did the step complete successfully? 

200 :param errors: Were errors encountered doing the step? 

201 :param logs: Logs recorded doing the step (only if errors) 

202 """ 

203 self.end_time = now_iso8601() 

204 self.completed = completed 

205 self.errors = errors 

206 if logmsg: 

207 self.add_log(logmsg) 

208 self.record() 

209 self.logger.info('%s ended. Completed: %s', self.stub, completed) 

210 

211 def update_status(self) -> None: 

212 """Update instance attribute doc with the current values""" 

213 # self.logger.debug('Current status: %s', self.status) 

214 contents = {} 

215 for val in self.ATTRLIST: 

216 if getattr(self, val) is not None: 

217 contents[val] = getattr(self, val) 

218 self.status = contents 

219 # self.logger.debug('Updated status: %s', self.status) 

220 

221 def build_doc(self) -> t.Dict: 

222 """Build the dictionary which will be the written to the tracking doc 

223 

224 :returns: The tracking doc dictionary 

225 """ 

226 doc = {} 

227 self.update_status() 

228 for key in self.ATTRLIST: 

229 if key in self.status: 

230 doc[key] = self.status[key] 

231 # Only add this field if self.index is not empty/None 

232 if self.index: 

233 doc['index'] = self.index 

234 # Only add this field if self.stepname is not empty/None 

235 if self.stepname: 

236 doc['step'] = self.stepname 

237 # Only add this field if self.task_id not empty/None 

238 if self.task_id: 

239 doc['task'] = self.task_id # Necessary for the parent-child relationship 

240 doc['job'] = self.job.name 

241 doc['dry_run'] = self.job.dry_run 

242 # self.logger.debug('Updated step doc: %s', doc) 

243 return doc 

244 

245 def record(self) -> None: 

246 """Record the current status of the task""" 

247 doc = self.build_doc() 

248 try: 

249 update_doc( 

250 self.job.client, self.job.index, self.doc_id, doc # type: ignore 

251 ) 

252 except Exception as exc: 

253 msg = f'Fatal error encountered: {exc.args[0]}' 

254 self.logger.critical(msg) 

255 raise FatalError(msg, exc) 

256 

257 def finished(self) -> bool: 

258 """ 

259 Check if a prior run was recorded for this step and log accordingly 

260 

261 :returns: State of whether a prior run failed to complete 

262 """ 

263 if self.completed: 

264 if self.job.dry_run: 

265 self.logger.info('DRY-RUN: Ignoring previous run of %s', self.stub) 

266 else: 

267 self.logger.info('%s was completed previously.', self.stub) 

268 return True 

269 if self.start_time: 

270 self.report_history() 

271 self.logger.warning('%s was not completed in a previous run.', self.stub) 

272 return False 

273 

274 

275class Task(Trackable): 

276 """An individual task item, tracked in Elasticsearch""" 

277 

278 def __init__( 

279 self, 

280 job: t.Optional['Job'] = None, 

281 index: str = '', 

282 id_suffix: str = '', 

283 task_id: str = '', 

284 ): 

285 super().__init__(job=job, index=index) 

286 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}') 

287 if job is None: 

288 raise MissingArgument('job', 'keyword argument', 'job') 

289 if task_id: 

290 self.task_id = task_id 

291 elif not index or not id_suffix: 

292 missing = ['task_id'] 

293 if not index: 

294 missing.append('index') 

295 if not id_suffix: 

296 missing.append('id_suffix') 

297 raise MissingArgument( 

298 'task_id, or both index and id_suffix must be provided', 

299 'keyword argument(s)', 

300 missing, 

301 ) 

302 else: 

303 self.task_id = f'{index}---{id_suffix}' 

304 self.index = index 

305 self.stub = f'Task: {self.task_id} of Job: {self.job.name}' 

306 self.doc_id = None 

307 self.get_history() 

308 

309 

310class Step(Trackable): 

311 """An individual step item, tracked in Elasticsearch""" 

312 

313 def __init__( 

314 self, 

315 job: t.Optional['Job'] = None, 

316 task: t.Optional[Task] = None, 

317 index: str = '', 

318 stepname: str = '', 

319 ): 

320 super().__init__(job=job, index=index) 

321 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}') 

322 if task is None: 

323 raise MissingArgument('task', 'keyword argument', 'task') 

324 if not stepname: 

325 raise MissingArgument( 

326 'stepname must be provided', 

327 'keyword argument(s)', 

328 'stepname', 

329 ) 

330 self.task_id = task.task_id 

331 self.job = task.job 

332 self.index = index 

333 self.stepname = stepname 

334 self.stub = f'Step: {stepname} of Task: {self.task_id} of Job: {task.job.name}' 

335 self.doc_id = None 

336 self.get_history()