Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/redacters/snapshot.py: 86%

80 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-03-17 23:49 -0600

1"""Redact data from a snapshot mounted index""" 

2 

3import typing as t 

4import logging 

5from datetime import datetime 

6from dotmap import DotMap # type: ignore 

7from es_pii_tool.exceptions import FatalError 

8from es_pii_tool.trackables import Task 

9from es_pii_tool.helpers import elastic_api as api 

10from es_pii_tool.helpers.utils import ( 

11 get_inc_version, 

12 strip_index_name, 

13) 

14from es_pii_tool.redacters.steps import RedactionSteps 

15 

16if t.TYPE_CHECKING: 

17 from elasticsearch8 import Elasticsearch 

18 from es_pii_tool.job import Job 

19 

20logger = logging.getLogger(__name__) 

21 

22 

23class RedactSnapshot: 

24 """Redact PII from indices mounted as searchable snapshots""" 

25 

26 def __init__(self, index: str, job: 'Job', phase: str): 

27 self.index = index 

28 self.phase = phase 

29 try: 

30 self.task = Task(job, index=index, id_suffix='REDACT-SNAPSHOT') 

31 except Exception as exc: 

32 logger.critical('Unable to create task: %s', exc) 

33 raise FatalError('Unable to create task', exc) from exc 

34 # self.var = self.ConfigAttrs(job.client, index, phase) 

35 self.var = DotMap() 

36 self._buildvar(job.client, index, phase) 

37 

38 def _buildvar(self, client: 'Elasticsearch', index: str, phase: str): 

39 """Populate :py:attr:`var` with the values we need to start with""" 

40 self.var.client = client 

41 self.var.index = index 

42 self.var.phase = phase 

43 self._get_mapped_vars(phase) 

44 self.var.og_name = strip_index_name(index) # Removes prefixes and suffixes 

45 now = datetime.now() 

46 self.var.redaction_target = ( 

47 f'redacted-{now.strftime("%Y%m%d%H%M%S")}-{self.var.og_name}' 

48 ) 

49 self.var.new_snap_name = f'{self.var.redaction_target}-snap' 

50 # Check if the old index has been redacted before and has a version number 

51 self.var.ver = get_inc_version(index) 

52 # The mount name contains a version at the end in case we need to redact 

53 # the index again. The version allows us to use a similar naming scheme 

54 # without redundancy 

55 self.var.mount_name = ( 

56 f'{self.var.prefix}redacted-{self.var.og_name}---v{self.var.ver + 1:03}' 

57 ) 

58 logger.debug('mount_name = %s', self.var.mount_name) 

59 

60 def _get_mapped_vars(self, phase: str): 

61 self.var.prefix = '' 

62 self.var.storage = '' 

63 if phase == 'cold': 

64 self.var.prefix = 'restored-' 

65 self.var.storage = 'full_copy' 

66 elif phase == 'frozen': 

67 self.var.prefix = 'partial-' 

68 self.var.storage = 'shared_cache' 

69 

70 def get_index_deets(self): 

71 """Return searchable snapshot values from deeply nested index settings""" 

72 response = api.get_index(self.var.client, self.var.index) 

73 logger.debug('Found indices: %s', list(response.keys())) 

74 self.var.aliases = DotMap(response[self.var.index]['aliases']) 

75 snap_data = response[self.var.index]['settings']['index']['store']['snapshot'] 

76 self.var.repository = snap_data['repository_name'] 

77 self.var.ss_snap = snap_data['snapshot_name'] 

78 self.var.ss_idx = snap_data['index_name'] 

79 logger.debug('ss_idx = %s', self.var.ss_idx) 

80 

81 @property 

82 def success(self) -> bool: 

83 """ 

84 :getter: Get the success state 

85 :setter: Set the success state 

86 :type: str 

87 """ 

88 return self._success 

89 

90 @success.setter 

91 def success(self, value: bool) -> None: 

92 self._success = value 

93 

94 def run(self): 

95 """Do the actual run""" 

96 if self.task.finished(): 

97 self.success = True 

98 return 

99 # Log task start time 

100 self.task.begin() 

101 logger.info("Getting index info: %s", self.index) 

102 self.var.restore_settings = DotMap(self.task.job.config['restore_settings']) 

103 # self.var.get_index_deets() 

104 self.get_index_deets() 

105 

106 steps = RedactionSteps(self.task, self.var) 

107 steps.run() 

108 

109 if not self.task.job.dry_run: 

110 msg = f'Index {self.index} has completed all steps.' 

111 logger.info(msg) 

112 self.task.add_log(msg) 

113 self.task.end(True, errors=False) 

114 self.success = True 

115 return 

116 # Implied else (meaning it is a dry run) 

117 _ = f'DRY-RUN || {self.task.logs}' 

118 self.success = False 

119 self.task.logs = _