Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/redacters/snapshot.py: 89%

75 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-10-01 16:39 -0600

1"""Redact data from a snapshot mounted index""" 

2 

3import typing as t 

4import logging 

5from datetime import datetime 

6from dotmap import DotMap # type: ignore 

7from es_pii_tool.task import Task 

8from es_pii_tool.helpers import elastic_api as api 

9from es_pii_tool.helpers.utils import ( 

10 get_inc_version, 

11 strip_index_name, 

12) 

13from es_pii_tool.redacters.steps import RedactionSteps 

14 

15if t.TYPE_CHECKING: 

16 from elasticsearch8 import Elasticsearch 

17 from es_pii_tool.job import Job 

18 

19logger = logging.getLogger(__name__) 

20 

21 

22class RedactSnapshot: 

23 """Redact PII from indices mounted as searchable snapshots""" 

24 

25 def __init__(self, index: str, job: 'Job', phase: str): 

26 self.index = index 

27 self.phase = phase 

28 self.task = Task(job, index=index, id_suffix='REDACT-SNAPSHOT') 

29 # self.var = self.ConfigAttrs(job.client, index, phase) 

30 self.var = DotMap() 

31 self._buildvar(job.client, index, phase) 

32 

33 def _buildvar(self, client: 'Elasticsearch', index: str, phase: str): 

34 """Populate :py:attr:`var` with the values we need to start with""" 

35 self.var.client = client 

36 self.var.index = index 

37 self.var.phase = phase 

38 self._get_mapped_vars(phase) 

39 self.var.og_name = strip_index_name(index) # Removes prefixes and suffixes 

40 now = datetime.now() 

41 self.var.redaction_target = ( 

42 f'redacted-{now.strftime("%Y%m%d%H%M%S")}-{self.var.og_name}' 

43 ) 

44 self.var.new_snap_name = f'{self.var.redaction_target}-snap' 

45 # Check if the old index has been redacted before and has a version number 

46 self.var.ver = get_inc_version(index) 

47 # The mount name contains a version at the end in case we need to redact 

48 # the index again. The version allows us to use a similar naming scheme 

49 # without redundancy 

50 self.var.mount_name = ( 

51 f'{self.var.prefix}redacted-{self.var.og_name}---v{self.var.ver + 1:03}' 

52 ) 

53 logger.debug('mount_name = %s', self.var.mount_name) 

54 

55 def _get_mapped_vars(self, phase: str): 

56 self.var.prefix = '' 

57 self.var.storage = '' 

58 if phase == 'cold': 

59 self.var.prefix = 'restored-' 

60 self.var.storage = 'full_copy' 

61 elif phase == 'frozen': 

62 self.var.prefix = 'partial-' 

63 self.var.storage = 'shared_cache' 

64 

65 def get_index_deets(self): 

66 """Return searchable snapshot values from deeply nested index settings""" 

67 response = api.get_index(self.var.client, self.var.index) 

68 logger.debug('Found indices: %s', list(response.keys())) 

69 self.var.aliases = DotMap(response[self.var.index]['aliases']) 

70 snap_data = response[self.var.index]['settings']['index']['store']['snapshot'] 

71 self.var.repository = snap_data['repository_name'] 

72 self.var.ss_snap = snap_data['snapshot_name'] 

73 self.var.ss_idx = snap_data['index_name'] 

74 logger.debug('ss_idx = %s', self.var.ss_idx) 

75 

76 @property 

77 def success(self) -> bool: 

78 """ 

79 :getter: Get the success state 

80 :setter: Set the success state 

81 :type: str 

82 """ 

83 return self._success 

84 

85 @success.setter 

86 def success(self, value: bool) -> None: 

87 self._success = value 

88 

89 def run(self): 

90 """Do the actual run""" 

91 if self.task.finished(): 

92 self.success = True 

93 return 

94 # Log task start time 

95 self.task.begin() 

96 logger.info("Getting index info: %s", self.index) 

97 self.var.restore_settings = DotMap(self.task.job.config['restore_settings']) 

98 # self.var.get_index_deets() 

99 self.get_index_deets() 

100 

101 steps = RedactionSteps(self.task, self.var) 

102 steps.run() 

103 

104 if not self.task.job.dry_run: 

105 msg = f'Index {self.index} has completed all steps.' 

106 logger.info(msg) 

107 self.task.add_log(msg) 

108 self.task.end(True, errors=False) 

109 self.success = True 

110 return 

111 # Implied else (meaning it is a dry run) 

112 _ = f'DRY-RUN || {self.task.logs}' 

113 self.success = False 

114 self.task.logs = _