Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/trackables.py: 79%
197 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 21:54 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 21:54 -0600
1"""Functions for creating & updating the progress/status update doc in Elasticsearch"""
3import typing as t
4import logging
5from es_pii_tool.exceptions import FatalError, MissingArgument, MissingDocument
6from es_pii_tool.helpers.elastic_api import get_progress_doc, update_doc
7from es_pii_tool.helpers.utils import now_iso8601
9if t.TYPE_CHECKING:
10 from es_pii_tool.job import Job
13MOD = __name__
15# pylint: disable=R0902,W0707
18class Trackable:
19 """An individual task or, tracked in Elasticsearch"""
21 ATTRLIST = ['start_time', 'completed', 'end_time', 'errors', 'logs']
23 def __init__(
24 self,
25 job: t.Optional['Job'] = None,
26 index: str = '',
27 ):
28 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}')
29 self.stub = ''
30 if job:
31 self.job = job
32 self.stub = f'Job: {job.name}'
33 self.index = index
34 self.task_id = ''
35 self.stepname = ''
36 self.doc_id = None
38 @property
39 def status(self) -> t.Dict:
40 """
41 The status of the current trackable, or retrieved from an previous
42 incomplete trackable
43 """
44 return self._status
46 @status.setter
47 def status(self, value: t.Dict) -> None:
48 self._status = value
50 @property
51 def start_time(self) -> str:
52 """The ISO8601 string representing the start time of this trackable"""
53 return self._start_time
55 @start_time.setter
56 def start_time(self, value: str) -> None:
57 self._start_time = value
59 @property
60 def end_time(self) -> str:
61 """The ISO8601 string representing the end time of this trackable"""
62 return self._end_time
64 @end_time.setter
65 def end_time(self, value: str) -> None:
66 self._end_time = value
68 @property
69 def completed(self) -> bool:
70 """Is the trackable completed? or Did the trackable complete successfully?"""
71 return self._completed
73 @completed.setter
74 def completed(self, value: bool) -> None:
75 self._completed = value
77 @property
78 def errors(self) -> bool:
79 """Were errors encountered during this trackable?"""
80 return self._errors
82 @errors.setter
83 def errors(self, value: bool) -> None:
84 self._errors = value
86 @property
87 def logs(self) -> t.Sequence[str]:
88 """The list of log lines collected during this trackable"""
89 return self._logs
91 @logs.setter
92 def logs(self, value: t.Sequence[str]) -> None:
93 self._logs = value
95 def add_log(self, value: str) -> None:
96 """Append another entry to self.logs"""
97 if not self.logs:
98 _ = []
99 _.append(f'{now_iso8601()} {value}')
100 else:
101 _ = self.logs
102 _.append(f'{now_iso8601()} {value}')
103 self.logs = _
105 def load_status(self) -> None:
106 """Load prior status values (or not)"""
107 for key in self.ATTRLIST:
108 if self.job.prev_dry_run:
109 # If our last run was a dry run, set each other attribute to None
110 setattr(self, key, None)
111 else:
112 if key in self.status:
113 setattr(self, key, self.status[key])
114 else:
115 setattr(self, key, None)
117 def get_trackable(self) -> t.Dict:
118 """
119 Get any history that may exist for self.stepname of self.task_id of
120 self.job.name
122 :returns: The step object from the progress/status update doc
123 """
124 retval = {}
125 try:
126 retval = get_progress_doc(
127 self.job.client,
128 self.job.index,
129 self.job.name,
130 self.task_id,
131 stepname=self.stepname,
132 )
133 except MissingDocument:
134 self.logger.debug('Doc tracking %s does not exist yet', self.stub)
135 return retval
136 except Exception as exc:
137 msg = f'Fatal error encountered: {exc.args[0]}'
138 self.logger.critical(msg)
139 raise FatalError(msg, exc)
140 self.doc_id = retval['_id']
141 return retval['_source']
143 def get_history(self) -> None:
144 """
145 Get the history of self.stepname, if any. Ensure all values are populated
146 from the doc, or None
147 """
148 self.logger.debug('Pulling any history for %s', self.stub)
149 self.status = self.get_trackable()
150 if not self.status:
151 self.logger.debug('No history for %s', self.stub)
152 self.load_status()
154 def report_history(self) -> None:
155 """
156 Get the history of any prior attempt to run self.task_id of self.job.name
157 Log aspects of the history here.
158 """
159 prefix = f'The prior run of {self.stub}'
160 if self.start_time:
161 self.logger.info('%s started at %s', prefix, self.start_time)
162 if self.completed:
163 if self.end_time:
164 self.logger.info('%s completed at %s', prefix, self.end_time)
165 else:
166 msg = 'is marked completed but did not record an end time'
167 self.logger.warning(
168 '%s started at %s and %s', prefix, self.start_time, msg
169 )
170 if self.errors:
171 self.logger.warning('%s encountered errors.', prefix)
172 if self.logs:
173 # Only report the log if a error is True
174 self.logger.warning('%s had log(s): %s', prefix, self.logs)
176 def begin(self) -> None:
177 """Begin the step and record the current status"""
178 self.logger.info('Beginning %s', self.stub)
179 if self.job.dry_run:
180 msg = 'DRY-RUN: No changes will be made'
181 self.logger.info(msg)
182 self.add_log(msg)
183 self.start_time = now_iso8601()
184 self.completed = False
185 self.record()
186 if not self.doc_id:
187 self.get_trackable()
188 self.load_status()
189 self.logger.debug('self.doc_id = %s', self.doc_id)
191 def end(
192 self,
193 completed: bool = False,
194 errors: bool = False,
195 logmsg: t.Union[str, None] = None,
196 ) -> None:
197 """End the step and record the current status
199 :param completed: Did the step complete successfully?
200 :param errors: Were errors encountered doing the step?
201 :param logs: Logs recorded doing the step (only if errors)
202 """
203 self.end_time = now_iso8601()
204 self.completed = completed
205 self.errors = errors
206 if logmsg:
207 self.add_log(logmsg)
208 self.record()
209 self.logger.info('%s ended. Completed: %s', self.stub, completed)
211 def update_status(self) -> None:
212 """Update instance attribute doc with the current values"""
213 # self.logger.debug('Current status: %s', self.status)
214 contents = {}
215 for val in self.ATTRLIST:
216 if getattr(self, val) is not None:
217 contents[val] = getattr(self, val)
218 self.status = contents
219 # self.logger.debug('Updated status: %s', self.status)
221 def build_doc(self) -> t.Dict:
222 """Build the dictionary which will be the written to the tracking doc
224 :returns: The tracking doc dictionary
225 """
226 doc = {}
227 self.update_status()
228 for key in self.ATTRLIST:
229 if key in self.status:
230 doc[key] = self.status[key]
231 # Only add this field if self.index is not empty/None
232 if self.index:
233 doc['index'] = self.index
234 # Only add this field if self.stepname is not empty/None
235 if self.stepname:
236 doc['step'] = self.stepname
237 # Only add this field if self.task_id not empty/None
238 if self.task_id:
239 doc['task'] = self.task_id # Necessary for the parent-child relationship
240 doc['job'] = self.job.name
241 doc['dry_run'] = self.job.dry_run
242 # self.logger.debug('Updated step doc: %s', doc)
243 return doc
245 def record(self) -> None:
246 """Record the current status of the task"""
247 doc = self.build_doc()
248 try:
249 update_doc(
250 self.job.client, self.job.index, self.doc_id, doc # type: ignore
251 )
252 except Exception as exc:
253 msg = f'Fatal error encountered: {exc.args[0]}'
254 self.logger.critical(msg)
255 raise FatalError(msg, exc)
257 def finished(self) -> bool:
258 """
259 Check if a prior run was recorded for this step and log accordingly
261 :returns: State of whether a prior run failed to complete
262 """
263 if self.completed:
264 if self.job.dry_run:
265 self.logger.info('DRY-RUN: Ignoring previous run of %s', self.stub)
266 else:
267 self.logger.info('%s was completed previously.', self.stub)
268 return True
269 if self.start_time:
270 self.report_history()
271 self.logger.warning('%s was not completed in a previous run.', self.stub)
272 return False
275class Task(Trackable):
276 """An individual task item, tracked in Elasticsearch"""
278 def __init__(
279 self,
280 job: t.Optional['Job'] = None,
281 index: str = '',
282 id_suffix: str = '',
283 task_id: str = '',
284 ):
285 super().__init__(job=job, index=index)
286 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}')
287 if job is None:
288 raise MissingArgument('job', 'keyword argument', 'job')
289 if task_id:
290 self.task_id = task_id
291 elif not index or not id_suffix:
292 missing = ['task_id']
293 if not index:
294 missing.append('index')
295 if not id_suffix:
296 missing.append('id_suffix')
297 raise MissingArgument(
298 'task_id, or both index and id_suffix must be provided',
299 'keyword argument(s)',
300 missing,
301 )
302 else:
303 self.task_id = f'{index}---{id_suffix}'
304 self.index = index
305 self.stub = f'Task: {self.task_id} of Job: {self.job.name}'
306 self.doc_id = None
307 self.get_history()
310class Step(Trackable):
311 """An individual step item, tracked in Elasticsearch"""
313 def __init__(
314 self,
315 job: t.Optional['Job'] = None,
316 task: t.Optional[Task] = None,
317 index: str = '',
318 stepname: str = '',
319 ):
320 super().__init__(job=job, index=index)
321 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}')
322 if task is None:
323 raise MissingArgument('task', 'keyword argument', 'task')
324 if not stepname:
325 raise MissingArgument(
326 'stepname must be provided',
327 'keyword argument(s)',
328 'stepname',
329 )
330 self.task_id = task.task_id
331 self.job = task.job
332 self.index = index
333 self.stepname = stepname
334 self.stub = f'Step: {stepname} of Task: {self.task_id} of Job: {task.job.name}'
335 self.doc_id = None
336 self.get_history()