Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/redacters/snapshot.py: 86%
80 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 23:49 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 23:49 -0600
1"""Redact data from a snapshot mounted index"""
3import typing as t
4import logging
5from datetime import datetime
6from dotmap import DotMap # type: ignore
7from es_pii_tool.exceptions import FatalError
8from es_pii_tool.trackables import Task
9from es_pii_tool.helpers import elastic_api as api
10from es_pii_tool.helpers.utils import (
11 get_inc_version,
12 strip_index_name,
13)
14from es_pii_tool.redacters.steps import RedactionSteps
16if t.TYPE_CHECKING:
17 from elasticsearch8 import Elasticsearch
18 from es_pii_tool.job import Job
20logger = logging.getLogger(__name__)
23class RedactSnapshot:
24 """Redact PII from indices mounted as searchable snapshots"""
26 def __init__(self, index: str, job: 'Job', phase: str):
27 self.index = index
28 self.phase = phase
29 try:
30 self.task = Task(job, index=index, id_suffix='REDACT-SNAPSHOT')
31 except Exception as exc:
32 logger.critical('Unable to create task: %s', exc)
33 raise FatalError('Unable to create task', exc) from exc
34 # self.var = self.ConfigAttrs(job.client, index, phase)
35 self.var = DotMap()
36 self._buildvar(job.client, index, phase)
38 def _buildvar(self, client: 'Elasticsearch', index: str, phase: str):
39 """Populate :py:attr:`var` with the values we need to start with"""
40 self.var.client = client
41 self.var.index = index
42 self.var.phase = phase
43 self._get_mapped_vars(phase)
44 self.var.og_name = strip_index_name(index) # Removes prefixes and suffixes
45 now = datetime.now()
46 self.var.redaction_target = (
47 f'redacted-{now.strftime("%Y%m%d%H%M%S")}-{self.var.og_name}'
48 )
49 self.var.new_snap_name = f'{self.var.redaction_target}-snap'
50 # Check if the old index has been redacted before and has a version number
51 self.var.ver = get_inc_version(index)
52 # The mount name contains a version at the end in case we need to redact
53 # the index again. The version allows us to use a similar naming scheme
54 # without redundancy
55 self.var.mount_name = (
56 f'{self.var.prefix}redacted-{self.var.og_name}---v{self.var.ver + 1:03}'
57 )
58 logger.debug('mount_name = %s', self.var.mount_name)
60 def _get_mapped_vars(self, phase: str):
61 self.var.prefix = ''
62 self.var.storage = ''
63 if phase == 'cold':
64 self.var.prefix = 'restored-'
65 self.var.storage = 'full_copy'
66 elif phase == 'frozen':
67 self.var.prefix = 'partial-'
68 self.var.storage = 'shared_cache'
70 def get_index_deets(self):
71 """Return searchable snapshot values from deeply nested index settings"""
72 response = api.get_index(self.var.client, self.var.index)
73 logger.debug('Found indices: %s', list(response.keys()))
74 self.var.aliases = DotMap(response[self.var.index]['aliases'])
75 snap_data = response[self.var.index]['settings']['index']['store']['snapshot']
76 self.var.repository = snap_data['repository_name']
77 self.var.ss_snap = snap_data['snapshot_name']
78 self.var.ss_idx = snap_data['index_name']
79 logger.debug('ss_idx = %s', self.var.ss_idx)
81 @property
82 def success(self) -> bool:
83 """
84 :getter: Get the success state
85 :setter: Set the success state
86 :type: str
87 """
88 return self._success
90 @success.setter
91 def success(self, value: bool) -> None:
92 self._success = value
94 def run(self):
95 """Do the actual run"""
96 if self.task.finished():
97 self.success = True
98 return
99 # Log task start time
100 self.task.begin()
101 logger.info("Getting index info: %s", self.index)
102 self.var.restore_settings = DotMap(self.task.job.config['restore_settings'])
103 # self.var.get_index_deets()
104 self.get_index_deets()
106 steps = RedactionSteps(self.task, self.var)
107 steps.run()
109 if not self.task.job.dry_run:
110 msg = f'Index {self.index} has completed all steps.'
111 logger.info(msg)
112 self.task.add_log(msg)
113 self.task.end(True, errors=False)
114 self.success = True
115 return
116 # Implied else (meaning it is a dry run)
117 _ = f'DRY-RUN || {self.task.logs}'
118 self.success = False
119 self.task.logs = _