Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/task.py: 78%

166 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-01-29 19:21 -0700

1"""Functions for creating & updating the progress/status update doc in Elasticsearch""" 

2 

3import typing as t 

4import logging 

5from es_pii_tool.exceptions import FatalError, MissingArgument, MissingDocument 

6from es_pii_tool.helpers.elastic_api import get_task_doc, update_doc 

7from es_pii_tool.helpers.utils import now_iso8601 

8 

9if t.TYPE_CHECKING: 

10 from es_pii_tool.job import Job 

11 

12MOD = __name__ 

13 

14# pylint: disable=R0902,W0707 

15 

16 

17class Task: 

18 """An individual task item, tracked in Elasticsearch""" 

19 

20 ATTRLIST = ['start_time', 'completed', 'end_time', 'errors', 'logs'] 

21 

22 def __init__( 

23 self, 

24 job: 'Job', 

25 index: t.Union[str, None] = None, 

26 id_suffix: t.Union[str, None] = None, 

27 task_id: t.Union[str, None] = None, 

28 ): 

29 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}') 

30 self.job = job 

31 if task_id: 

32 self.task_id = task_id 

33 elif not id_suffix or not index: 

34 missing = ['task_id'] 

35 if id_suffix is None: 

36 missing.append('id_suffix') 

37 if index is None: 

38 missing.append('index') 

39 raise MissingArgument( 

40 'task_id, or both index and id_suffix must be provided', 

41 'keyword argument(s)', 

42 missing, 

43 ) 

44 else: 

45 self.task_id = f'{index}---{id_suffix}' 

46 self.index = index 

47 self.doc_id = None 

48 self.get_history() 

49 

50 @property 

51 def status(self) -> t.Dict: 

52 """ 

53 The status of the current task, or retrieved from an previous incomplete task 

54 """ 

55 return self._status 

56 

57 @status.setter 

58 def status(self, value: t.Dict) -> None: 

59 self._status = value 

60 

61 @property 

62 def start_time(self) -> str: 

63 """The ISO8601 string representing the start time of this task""" 

64 return self._start_time 

65 

66 @start_time.setter 

67 def start_time(self, value: str) -> None: 

68 self._start_time = value 

69 

70 @property 

71 def end_time(self) -> str: 

72 """The ISO8601 string representing the end time of this task""" 

73 return self._end_time 

74 

75 @end_time.setter 

76 def end_time(self, value: str) -> None: 

77 self._end_time = value 

78 

79 @property 

80 def completed(self) -> bool: 

81 """Is the task completed? or Did the task complete successfully?""" 

82 return self._completed 

83 

84 @completed.setter 

85 def completed(self, value: bool) -> None: 

86 self._completed = value 

87 

88 @property 

89 def errors(self) -> bool: 

90 """Were errors encountered during this task?""" 

91 return self._errors 

92 

93 @errors.setter 

94 def errors(self, value: bool) -> None: 

95 self._errors = value 

96 

97 @property 

98 def logs(self) -> t.Sequence[str]: 

99 """The list of log lines collected during this task""" 

100 return self._logs 

101 

102 @logs.setter 

103 def logs(self, value: t.Sequence[str]) -> None: 

104 self._logs = value 

105 

106 def add_log(self, value: str) -> None: 

107 """Append another entry to self.logs""" 

108 if not self.logs: 

109 _ = [] 

110 _.append(f'{now_iso8601()} {value}') 

111 else: 

112 _ = self.logs 

113 _.append(f'{now_iso8601()} {value}') 

114 self.logs = _ 

115 

116 def load_status(self) -> None: 

117 """Load prior status values (or not)""" 

118 for key in self.ATTRLIST: 

119 if self.job.prev_dry_run: 

120 # If our last run was a dry run, set each other attribute to None 

121 setattr(self, key, None) 

122 else: 

123 if key in self.status: 

124 setattr(self, key, self.status[key]) 

125 else: 

126 setattr(self, key, None) 

127 

128 def get_task(self) -> t.Dict: 

129 """Get any task history that may exist for self.job.name and self.task_id 

130 

131 :returns: The task object from the progress/status update doc 

132 """ 

133 retval = {} 

134 try: 

135 retval = get_task_doc( 

136 self.job.client, self.job.index, self.job.name, self.task_id 

137 ) 

138 except MissingDocument: 

139 self.logger.debug( 

140 'Doc tracking job: %s, task: %s does not exist yet', 

141 self.job.name, 

142 self.task_id, 

143 ) 

144 return retval 

145 except Exception as exc: 

146 msg = f'Fatal error encountered: {exc.args[0]}' 

147 self.logger.critical(msg) 

148 raise FatalError(msg, exc) 

149 self.doc_id = retval['_id'] 

150 return retval['_source'] 

151 

152 def get_history(self) -> None: 

153 """ 

154 Get the history of a taskid, if any. Ensure all values are populated from the 

155 doc, or None 

156 """ 

157 self.logger.debug('Pulling any history for task: %s', self.task_id) 

158 self.status = self.get_task() 

159 if not self.status: 

160 self.logger.debug( 

161 'No history for job: %s, task: %s', self.job.name, self.task_id 

162 ) 

163 self.load_status() 

164 

165 def report_history(self) -> None: 

166 """ 

167 Get the history of any prior attempt to run self.task_id of self.job.name 

168 Log aspects of the history here. 

169 """ 

170 prefix = f'The prior run of job: {self.job.name}, task: {self.task_id}' 

171 if self.start_time: 

172 self.logger.info('%s started at %s', prefix, self.start_time) 

173 if self.completed: 

174 if self.end_time: 

175 self.logger.info('%s completed at %s', prefix, self.end_time) 

176 else: 

177 msg = 'is marked completed but did not record an end time' 

178 self.logger.warning( 

179 '%s started at %s and %s', prefix, self.start_time, msg 

180 ) 

181 if self.errors: 

182 self.logger.warning('%s encountered errors.', prefix) 

183 if self.logs: 

184 # Only report the log if a error is True 

185 self.logger.warning('%s had log(s): %s', prefix, self.logs) 

186 

187 def begin(self) -> None: 

188 """Begin the task and record the current status""" 

189 self.logger.info('Beginning job: %s, task: %s', self.job.name, self.task_id) 

190 if self.job.dry_run: 

191 msg = 'DRY-RUN: No changes will be made' 

192 self.logger.info(msg) 

193 self.add_log(msg) 

194 self.start_time = now_iso8601() 

195 self.completed = False 

196 self.record() 

197 if not self.doc_id: 

198 self.get_task() 

199 self.load_status() 

200 self.logger.debug('self.doc_id = %s', self.doc_id) 

201 

202 def end( 

203 self, 

204 completed: bool = False, 

205 errors: bool = False, 

206 logmsg: t.Union[str, None] = None, 

207 ) -> None: 

208 """End the task and record the current status 

209 

210 :param completed: Did the job complete successfully? 

211 :param errors: Were errors encountered doing the job? 

212 :param logs: Logs recorded doing the job (only if errors) 

213 """ 

214 self.end_time = now_iso8601() 

215 self.completed = completed 

216 self.errors = errors 

217 if logmsg: 

218 self.add_log(logmsg) 

219 self.record() 

220 self.logger.info( 

221 'Job: %s, task: %s ended. Completed: %s', 

222 self.job.name, 

223 self.task_id, 

224 completed, 

225 ) 

226 

227 def update_status(self) -> None: 

228 """Update instance attribute doc with the current values""" 

229 # self.logger.debug('Current status: %s', self.status) 

230 contents = {} 

231 for val in self.ATTRLIST: 

232 if getattr(self, val) is not None: 

233 contents[val] = getattr(self, val) 

234 self.status = contents 

235 # self.logger.debug('Updated status: %s', self.status) 

236 

237 def build_doc(self) -> t.Dict: 

238 """Build the dictionary which will be the written to the tracking doc 

239 

240 :returns: The tracking doc dictionary 

241 """ 

242 doc = {} 

243 self.update_status() 

244 for key in self.ATTRLIST: 

245 if key in self.status: 

246 doc[key] = self.status[key] 

247 if self.index: 

248 # For the PRE check, there is no value here, so let's not add a null field. 

249 doc['index'] = self.index 

250 doc['job'] = self.job.name 

251 doc['task'] = self.task_id 

252 doc['join_field'] = {'name': 'task', 'parent': self.job.name} 

253 doc['dry_run'] = self.job.dry_run 

254 # self.logger.debug('Updated task doc: %s', doc) 

255 return doc 

256 

257 def record(self) -> None: 

258 """Record the current status of the task""" 

259 doc = self.build_doc() 

260 try: 

261 update_doc( 

262 self.job.client, self.job.index, self.doc_id, doc # type: ignore 

263 ) 

264 except Exception as exc: 

265 msg = f'Fatal error encountered: {exc.args[0]}' 

266 self.logger.critical(msg) 

267 raise FatalError(msg, exc) 

268 

269 def finished(self) -> bool: 

270 """ 

271 Check if a prior run was recorded for this task and log accordingly 

272 

273 :returns: State of whether a prior run failed to complete 

274 """ 

275 if self.completed: 

276 if self.job.dry_run: 

277 self.logger.info( 

278 'DRY-RUN: Ignoring previous run of job: %s, task %s', 

279 self.job.name, 

280 self.task_id, 

281 ) 

282 else: 

283 self.logger.info( 

284 'Job: %s, task: %s was completed previously.', 

285 self.job.name, 

286 self.task_id, 

287 ) 

288 return True 

289 if self.start_time: 

290 self.report_history() 

291 self.logger.warning('Restarting or resuming task: %s', self.task_id) 

292 return False