Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/redacters/snapshot.py: 89%
75 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-10-01 16:39 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2024-10-01 16:39 -0600
1"""Redact data from a snapshot mounted index"""
3import typing as t
4import logging
5from datetime import datetime
6from dotmap import DotMap # type: ignore
7from es_pii_tool.task import Task
8from es_pii_tool.helpers import elastic_api as api
9from es_pii_tool.helpers.utils import (
10 get_inc_version,
11 strip_index_name,
12)
13from es_pii_tool.redacters.steps import RedactionSteps
15if t.TYPE_CHECKING:
16 from elasticsearch8 import Elasticsearch
17 from es_pii_tool.job import Job
19logger = logging.getLogger(__name__)
22class RedactSnapshot:
23 """Redact PII from indices mounted as searchable snapshots"""
25 def __init__(self, index: str, job: 'Job', phase: str):
26 self.index = index
27 self.phase = phase
28 self.task = Task(job, index=index, id_suffix='REDACT-SNAPSHOT')
29 # self.var = self.ConfigAttrs(job.client, index, phase)
30 self.var = DotMap()
31 self._buildvar(job.client, index, phase)
33 def _buildvar(self, client: 'Elasticsearch', index: str, phase: str):
34 """Populate :py:attr:`var` with the values we need to start with"""
35 self.var.client = client
36 self.var.index = index
37 self.var.phase = phase
38 self._get_mapped_vars(phase)
39 self.var.og_name = strip_index_name(index) # Removes prefixes and suffixes
40 now = datetime.now()
41 self.var.redaction_target = (
42 f'redacted-{now.strftime("%Y%m%d%H%M%S")}-{self.var.og_name}'
43 )
44 self.var.new_snap_name = f'{self.var.redaction_target}-snap'
45 # Check if the old index has been redacted before and has a version number
46 self.var.ver = get_inc_version(index)
47 # The mount name contains a version at the end in case we need to redact
48 # the index again. The version allows us to use a similar naming scheme
49 # without redundancy
50 self.var.mount_name = (
51 f'{self.var.prefix}redacted-{self.var.og_name}---v{self.var.ver + 1:03}'
52 )
53 logger.debug('mount_name = %s', self.var.mount_name)
55 def _get_mapped_vars(self, phase: str):
56 self.var.prefix = ''
57 self.var.storage = ''
58 if phase == 'cold':
59 self.var.prefix = 'restored-'
60 self.var.storage = 'full_copy'
61 elif phase == 'frozen':
62 self.var.prefix = 'partial-'
63 self.var.storage = 'shared_cache'
65 def get_index_deets(self):
66 """Return searchable snapshot values from deeply nested index settings"""
67 response = api.get_index(self.var.client, self.var.index)
68 logger.debug('Found indices: %s', list(response.keys()))
69 self.var.aliases = DotMap(response[self.var.index]['aliases'])
70 snap_data = response[self.var.index]['settings']['index']['store']['snapshot']
71 self.var.repository = snap_data['repository_name']
72 self.var.ss_snap = snap_data['snapshot_name']
73 self.var.ss_idx = snap_data['index_name']
74 logger.debug('ss_idx = %s', self.var.ss_idx)
76 @property
77 def success(self) -> bool:
78 """
79 :getter: Get the success state
80 :setter: Set the success state
81 :type: str
82 """
83 return self._success
85 @success.setter
86 def success(self, value: bool) -> None:
87 self._success = value
89 def run(self):
90 """Do the actual run"""
91 if self.task.finished():
92 self.success = True
93 return
94 # Log task start time
95 self.task.begin()
96 logger.info("Getting index info: %s", self.index)
97 self.var.restore_settings = DotMap(self.task.job.config['restore_settings'])
98 # self.var.get_index_deets()
99 self.get_index_deets()
101 steps = RedactionSteps(self.task, self.var)
102 steps.run()
104 if not self.task.job.dry_run:
105 msg = f'Index {self.index} has completed all steps.'
106 logger.info(msg)
107 self.task.add_log(msg)
108 self.task.end(True, errors=False)
109 self.success = True
110 return
111 # Implied else (meaning it is a dry run)
112 _ = f'DRY-RUN || {self.task.logs}'
113 self.success = False
114 self.task.logs = _