Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/task.py: 78%
166 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-01-29 19:21 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2025-01-29 19:21 -0700
1"""Functions for creating & updating the progress/status update doc in Elasticsearch"""
3import typing as t
4import logging
5from es_pii_tool.exceptions import FatalError, MissingArgument, MissingDocument
6from es_pii_tool.helpers.elastic_api import get_task_doc, update_doc
7from es_pii_tool.helpers.utils import now_iso8601
9if t.TYPE_CHECKING:
10 from es_pii_tool.job import Job
12MOD = __name__
14# pylint: disable=R0902,W0707
17class Task:
18 """An individual task item, tracked in Elasticsearch"""
20 ATTRLIST = ['start_time', 'completed', 'end_time', 'errors', 'logs']
22 def __init__(
23 self,
24 job: 'Job',
25 index: t.Union[str, None] = None,
26 id_suffix: t.Union[str, None] = None,
27 task_id: t.Union[str, None] = None,
28 ):
29 self.logger = logging.getLogger(f'{MOD}.{self.__class__.__name__}')
30 self.job = job
31 if task_id:
32 self.task_id = task_id
33 elif not id_suffix or not index:
34 missing = ['task_id']
35 if id_suffix is None:
36 missing.append('id_suffix')
37 if index is None:
38 missing.append('index')
39 raise MissingArgument(
40 'task_id, or both index and id_suffix must be provided',
41 'keyword argument(s)',
42 missing,
43 )
44 else:
45 self.task_id = f'{index}---{id_suffix}'
46 self.index = index
47 self.doc_id = None
48 self.get_history()
50 @property
51 def status(self) -> t.Dict:
52 """
53 The status of the current task, or retrieved from an previous incomplete task
54 """
55 return self._status
57 @status.setter
58 def status(self, value: t.Dict) -> None:
59 self._status = value
61 @property
62 def start_time(self) -> str:
63 """The ISO8601 string representing the start time of this task"""
64 return self._start_time
66 @start_time.setter
67 def start_time(self, value: str) -> None:
68 self._start_time = value
70 @property
71 def end_time(self) -> str:
72 """The ISO8601 string representing the end time of this task"""
73 return self._end_time
75 @end_time.setter
76 def end_time(self, value: str) -> None:
77 self._end_time = value
79 @property
80 def completed(self) -> bool:
81 """Is the task completed? or Did the task complete successfully?"""
82 return self._completed
84 @completed.setter
85 def completed(self, value: bool) -> None:
86 self._completed = value
88 @property
89 def errors(self) -> bool:
90 """Were errors encountered during this task?"""
91 return self._errors
93 @errors.setter
94 def errors(self, value: bool) -> None:
95 self._errors = value
97 @property
98 def logs(self) -> t.Sequence[str]:
99 """The list of log lines collected during this task"""
100 return self._logs
102 @logs.setter
103 def logs(self, value: t.Sequence[str]) -> None:
104 self._logs = value
106 def add_log(self, value: str) -> None:
107 """Append another entry to self.logs"""
108 if not self.logs:
109 _ = []
110 _.append(f'{now_iso8601()} {value}')
111 else:
112 _ = self.logs
113 _.append(f'{now_iso8601()} {value}')
114 self.logs = _
116 def load_status(self) -> None:
117 """Load prior status values (or not)"""
118 for key in self.ATTRLIST:
119 if self.job.prev_dry_run:
120 # If our last run was a dry run, set each other attribute to None
121 setattr(self, key, None)
122 else:
123 if key in self.status:
124 setattr(self, key, self.status[key])
125 else:
126 setattr(self, key, None)
128 def get_task(self) -> t.Dict:
129 """Get any task history that may exist for self.job.name and self.task_id
131 :returns: The task object from the progress/status update doc
132 """
133 retval = {}
134 try:
135 retval = get_task_doc(
136 self.job.client, self.job.index, self.job.name, self.task_id
137 )
138 except MissingDocument:
139 self.logger.debug(
140 'Doc tracking job: %s, task: %s does not exist yet',
141 self.job.name,
142 self.task_id,
143 )
144 return retval
145 except Exception as exc:
146 msg = f'Fatal error encountered: {exc.args[0]}'
147 self.logger.critical(msg)
148 raise FatalError(msg, exc)
149 self.doc_id = retval['_id']
150 return retval['_source']
152 def get_history(self) -> None:
153 """
154 Get the history of a taskid, if any. Ensure all values are populated from the
155 doc, or None
156 """
157 self.logger.debug('Pulling any history for task: %s', self.task_id)
158 self.status = self.get_task()
159 if not self.status:
160 self.logger.debug(
161 'No history for job: %s, task: %s', self.job.name, self.task_id
162 )
163 self.load_status()
165 def report_history(self) -> None:
166 """
167 Get the history of any prior attempt to run self.task_id of self.job.name
168 Log aspects of the history here.
169 """
170 prefix = f'The prior run of job: {self.job.name}, task: {self.task_id}'
171 if self.start_time:
172 self.logger.info('%s started at %s', prefix, self.start_time)
173 if self.completed:
174 if self.end_time:
175 self.logger.info('%s completed at %s', prefix, self.end_time)
176 else:
177 msg = 'is marked completed but did not record an end time'
178 self.logger.warning(
179 '%s started at %s and %s', prefix, self.start_time, msg
180 )
181 if self.errors:
182 self.logger.warning('%s encountered errors.', prefix)
183 if self.logs:
184 # Only report the log if a error is True
185 self.logger.warning('%s had log(s): %s', prefix, self.logs)
187 def begin(self) -> None:
188 """Begin the task and record the current status"""
189 self.logger.info('Beginning job: %s, task: %s', self.job.name, self.task_id)
190 if self.job.dry_run:
191 msg = 'DRY-RUN: No changes will be made'
192 self.logger.info(msg)
193 self.add_log(msg)
194 self.start_time = now_iso8601()
195 self.completed = False
196 self.record()
197 if not self.doc_id:
198 self.get_task()
199 self.load_status()
200 self.logger.debug('self.doc_id = %s', self.doc_id)
202 def end(
203 self,
204 completed: bool = False,
205 errors: bool = False,
206 logmsg: t.Union[str, None] = None,
207 ) -> None:
208 """End the task and record the current status
210 :param completed: Did the job complete successfully?
211 :param errors: Were errors encountered doing the job?
212 :param logs: Logs recorded doing the job (only if errors)
213 """
214 self.end_time = now_iso8601()
215 self.completed = completed
216 self.errors = errors
217 if logmsg:
218 self.add_log(logmsg)
219 self.record()
220 self.logger.info(
221 'Job: %s, task: %s ended. Completed: %s',
222 self.job.name,
223 self.task_id,
224 completed,
225 )
227 def update_status(self) -> None:
228 """Update instance attribute doc with the current values"""
229 # self.logger.debug('Current status: %s', self.status)
230 contents = {}
231 for val in self.ATTRLIST:
232 if getattr(self, val) is not None:
233 contents[val] = getattr(self, val)
234 self.status = contents
235 # self.logger.debug('Updated status: %s', self.status)
237 def build_doc(self) -> t.Dict:
238 """Build the dictionary which will be the written to the tracking doc
240 :returns: The tracking doc dictionary
241 """
242 doc = {}
243 self.update_status()
244 for key in self.ATTRLIST:
245 if key in self.status:
246 doc[key] = self.status[key]
247 if self.index:
248 # For the PRE check, there is no value here, so let's not add a null field.
249 doc['index'] = self.index
250 doc['job'] = self.job.name
251 doc['task'] = self.task_id
252 doc['join_field'] = {'name': 'task', 'parent': self.job.name}
253 doc['dry_run'] = self.job.dry_run
254 # self.logger.debug('Updated task doc: %s', doc)
255 return doc
257 def record(self) -> None:
258 """Record the current status of the task"""
259 doc = self.build_doc()
260 try:
261 update_doc(
262 self.job.client, self.job.index, self.doc_id, doc # type: ignore
263 )
264 except Exception as exc:
265 msg = f'Fatal error encountered: {exc.args[0]}'
266 self.logger.critical(msg)
267 raise FatalError(msg, exc)
269 def finished(self) -> bool:
270 """
271 Check if a prior run was recorded for this task and log accordingly
273 :returns: State of whether a prior run failed to complete
274 """
275 if self.completed:
276 if self.job.dry_run:
277 self.logger.info(
278 'DRY-RUN: Ignoring previous run of job: %s, task %s',
279 self.job.name,
280 self.task_id,
281 )
282 else:
283 self.logger.info(
284 'Job: %s, task: %s was completed previously.',
285 self.job.name,
286 self.task_id,
287 )
288 return True
289 if self.start_time:
290 self.report_history()
291 self.logger.warning('Restarting or resuming task: %s', self.task_id)
292 return False