Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/steps.py: 71%

468 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-01-29 19:37 -0700

1"""Each function is a single step in PII redaction""" 

2 

3from os import getenv 

4import typing as t 

5import time 

6import logging 

7from dotmap import DotMap # type: ignore 

8from es_wait import IlmPhase, IlmStep 

9from es_pii_tool.defaults import ( 

10 PAUSE_DEFAULT, 

11 PAUSE_ENVVAR, 

12 TIMEOUT_DEFAULT, 

13 TIMEOUT_ENVVAR, 

14) 

15from es_pii_tool.exceptions import ( 

16 BadClientResult, 

17 FatalError, 

18 MissingArgument, 

19 MissingError, 

20 MissingIndex, 

21 ValueMismatch, 

22) 

23from es_pii_tool.trackables import Step 

24from es_pii_tool.helpers import elastic_api as api 

25from es_pii_tool.helpers.utils import ( 

26 configure_ilm_policy, 

27 get_alias_actions, 

28 strip_ilm_name, 

29 es_waiter, 

30) 

31 

32if t.TYPE_CHECKING: 

33 from es_pii_tool.trackables import Task 

34 

35PAUSE_VALUE = float(getenv(PAUSE_ENVVAR, default=PAUSE_DEFAULT)) 

36TIMEOUT_VALUE = float(getenv(TIMEOUT_ENVVAR, default=TIMEOUT_DEFAULT)) 

37 

38logger = logging.getLogger(__name__) 

39 

40 

41def failed_step(task: 'Task', step: 'Step', exc): 

42 """Function to avoid repetition of code if a step fails""" 

43 # MissingIndex, BadClientResult are the only ones inbound 

44 if isinstance(exc, MissingIndex): 

45 msg = ( 

46 f'Step failed because index {exc.missing} was not found. The upstream ' 

47 f'exception type was MissingIndex, with error message: ' 

48 f'{exc.upstream.args[0]}' 

49 ) 

50 elif isinstance(exc, BadClientResult): 

51 msg = ( 

52 f'Step failed because of a bad or unexpected response or result from ' 

53 f'the Elasticsearch cluster. The upstream exception type was ' 

54 f'BadClientResult, with error message: {exc.upstream.args[0]}' 

55 ) 

56 else: 

57 msg = f'Step failed for an unexpected reason: {exc}' 

58 logger.critical(msg) 

59 step.end(False, errors=True, logmsg=f'{msg}') 

60 task.end(False, errors=True, logmsg=f'Failed {step.stepname}') 

61 raise FatalError(msg, exc) 

62 

63 

64def metastep(task: 'Task', stepname: str, func, *args, **kwargs) -> None: 

65 """The reusable step""" 

66 step = Step(task=task, stepname=stepname) 

67 if step.finished(): 

68 logger.info('%s: already completed', step.stub) 

69 return 

70 step.begin() 

71 dry_run_safe = kwargs.pop('dry_run_safe', False) 

72 dry_run_msg = kwargs.pop('dry_run_msg', None) 

73 include_step = kwargs.pop('include_step', False) 

74 if include_step: 

75 kwargs['step'] = step 

76 if (dry_run_safe and task.job.dry_run) or not task.job.dry_run: 

77 try: 

78 response = func(*args, **kwargs) 

79 except (MissingIndex, BadClientResult, ValueMismatch) as exc: 

80 failed_step(task, step, exc) 

81 if response: 

82 step.add_log(f'{response}') 

83 else: 

84 if dry_run_msg is None: 

85 dry_run_msg = 'No action logged' 

86 msg = f'Dry-Run: No changes, but expected behavior: {dry_run_msg}' 

87 step.add_log(msg) 

88 logger.debug(msg) 

89 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

90 

91 

92def missing_data(stepname, kwargs) -> None: 

93 """Avoid duplicated code for data check""" 

94 if 'data' not in kwargs: 

95 msg = f'"{stepname}" is missing keyword argument(s)' 

96 what = 'type: DotMap' 

97 names = ['data'] 

98 raise MissingArgument(msg, what, names) 

99 

100 

101def _meta_resolve_index(var: DotMap, data: DotMap) -> str: 

102 """Make a metastep for resolve_index""" 

103 result = api.resolve_index(var.client, var.index) 

104 logger.debug('resolve data: %s', result) 

105 response = '' 

106 try: 

107 data.data_stream = result['indices'][0]['data_stream'] 

108 except KeyError: 

109 response = f'Index {var.index} is not part of a data_stream' 

110 logger.debug(response) 

111 return response 

112 

113 

114def resolve_index(task: 'Task', stepname: str, var: DotMap, **kwargs) -> None: 

115 """ 

116 Resolve the index to see if it's part of a data stream 

117 """ 

118 missing_data(stepname, kwargs) 

119 data = kwargs['data'] 

120 metastep(task, stepname, _meta_resolve_index, var, data, dry_run_safe=True) 

121 

122 

123def _meta_pre_delete(var: DotMap) -> str: 

124 """Make a metastep for pre_delete""" 

125 response = '' 

126 # The metastep will handle the "don't do this if dry_run" logic 

127 try: 

128 api.delete_index(var.client, var.redaction_target) 

129 except MissingIndex: 

130 # Not a problem. This is normal and expected. 

131 response = f'Pre-delete did not find index "{var.redaction_target}"' 

132 logger.debug(response) 

133 return response 

134 

135 

136def pre_delete(task: 'Task', stepname: str, var: DotMap, **kwargs) -> None: 

137 """ 

138 Pre-delete the redacted index to ensure no collisions. Ignore if not present 

139 """ 

140 missing_data(stepname, kwargs) 

141 drm = 'Delete index {var.redaction_target} (if it exists)' 

142 metastep(task, stepname, _meta_pre_delete, var, dry_run_msg=drm) 

143 

144 

145def _meta_restore_index(var: DotMap) -> str: 

146 """Make a metastep for restore_index""" 

147 response = f'Restored {var.ss_idx} to {var.redaction_target}' 

148 try: 

149 api.restore_index( 

150 var.client, 

151 var.repository, 

152 var.ss_snap, 

153 var.ss_idx, 

154 var.redaction_target, 

155 index_settings=var.restore_settings.toDict(), 

156 ) 

157 except BadClientResult as bad: 

158 response = f'Unable to restore {var.ss_idx} to {var.redaction_target}: {bad}' 

159 logger.error(response) 

160 return response 

161 

162 

163def restore_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

164 """Restore index from snapshot""" 

165 missing_data(stepname, kwargs) 

166 drm = f'Restore {var.ss_idx} to {var.redaction_target}' 

167 metastep(task, stepname, _meta_restore_index, var, dry_run_msg=drm) 

168 

169 

170def _meta_get_ilm_data(var: DotMap, data: DotMap) -> str: 

171 """Make a metastep for get_index_lifecycle_data""" 

172 res = api.get_settings(var.client, var.index) 

173 response = '' 

174 data.index = DotMap() 

175 data.index.lifecycle = DotMap( 

176 {'name': None, 'rollover_alias': None, 'indexing_complete': True} 

177 ) 

178 try: 

179 data.index.lifecycle = DotMap(res[var.index]['settings']['index']['lifecycle']) 

180 except KeyError as err: 

181 response = f'Index {var.index} missing one or more lifecycle keys: {err}' 

182 if data.index.lifecycle.name: 

183 response = f'Index lifecycle settings: {data.index.lifecycle}' 

184 else: 

185 response = f'Index {var.index} has no ILM lifecycle' 

186 logger.debug(response) 

187 return response 

188 

189 

190def get_index_lifecycle_data(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

191 """ 

192 Populate data.index with index settings results referenced at 

193 INDEXNAME.settings.index.lifecycle 

194 """ 

195 missing_data(stepname, kwargs) 

196 data = kwargs['data'] 

197 metastep(task, stepname, _meta_get_ilm_data, var, data, dry_run_safe=True) 

198 

199 

200def _meta_get_ilm_explain_data(var: DotMap, data: DotMap) -> str: 

201 """Make a metastep for get_ilm_explain_data""" 

202 response = '' 

203 if data.index.lifecycle.name: 

204 data.ilm = DotMap() 

205 try: 

206 res = api.get_ilm(var.client, var.index) 

207 data.ilm.explain = DotMap(res['indices'][var.index]) 

208 response = f'ILM explain settings: {data.ilm.explain}' 

209 except MissingIndex as exc: 

210 logger.error('Index %s not found in ILM explain data', var.index) 

211 raise exc 

212 else: 

213 response = f'Index {var.index} has no ILM explain data' 

214 logger.debug(response) 

215 return response 

216 

217 

218def get_ilm_explain_data(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

219 """ 

220 Populate data.ilm.explain with ilm_explain data 

221 """ 

222 missing_data(stepname, kwargs) 

223 data = kwargs['data'] 

224 metastep(task, stepname, _meta_get_ilm_explain_data, var, data, dry_run_safe=True) 

225 

226 

227def _meta_get_ilm_lifecycle_data(var: DotMap, data: DotMap) -> str: 

228 """Make a metastep for get_ilm_lifecycle_data""" 

229 response = '' 

230 if data.index.lifecycle.name: 

231 res = api.get_ilm_lifecycle(var.client, data.index.lifecycle.name) 

232 if not res: 

233 msg = f'No such ILM policy: {data.index.lifecycle.name}' 

234 raise BadClientResult(msg, Exception()) 

235 data.ilm.lifecycle = DotMap(res[data.index.lifecycle.name]) 

236 response = f'ILM lifecycle settings: {data.ilm.lifecycle}' 

237 else: 

238 response = f'Index {var.index} has no ILM lifecycle data' 

239 logger.debug(response) 

240 return response 

241 

242 

243def get_ilm_lifecycle_data(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

244 """ 

245 Populate data.ilm.explain with ilm_explain data 

246 """ 

247 missing_data(stepname, kwargs) 

248 data = kwargs['data'] 

249 metastep(task, stepname, _meta_get_ilm_lifecycle_data, var, data, dry_run_safe=True) 

250 

251 

252def clone_ilm_policy(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

253 """ 

254 If this index has an ILM policy, we need to clone it so we can attach 

255 the new index to it. 

256 """ 

257 missing_data(stepname, kwargs) 

258 data = kwargs['data'] 

259 step = Step(task=task, stepname=stepname) 

260 if step.finished(): 

261 logger.info('%s: already completed', step.stub) 

262 return 

263 step.begin() 

264 if data.index.lifecycle.name is None or not data.ilm.lifecycle.policy: 

265 _ = f'{stepname}: Index {var.index} has no ILM lifecycle or policy data' 

266 logger.debug(_) 

267 step.add_log(_) 

268 return 

269 data.new = DotMap() 

270 

271 # From here, we check for matching named cloned policy 

272 

273 configure_ilm_policy(task, data) 

274 

275 # New ILM policy naming: pii-tool-POLICYNAME---v### 

276 stub = f'pii-tool-{strip_ilm_name(data.index.lifecycle.name)}' 

277 policy = data.new.ilmpolicy.toDict() # For comparison 

278 resp = {'dummy': 'startval'} # So the while loop can start with something 

279 policyver = 0 # Our version number starting point. 

280 policymatch = False 

281 while resp: 

282 data.new.ilmname = f'{stub}---v{policyver + 1:03}' 

283 resp = api.get_ilm_lifecycle(var.client, data.new.ilmname) # type: ignore 

284 if resp: # We have data, so the name matches 

285 # Compare the new policy to the one just returned 

286 if policy == resp[data.new.ilmname]['policy']: # type: ignore 

287 msg = f'New policy data matches: {data.new.ilmname}' 

288 logger.debug(msg) 

289 step.add_log(msg) 

290 policymatch = True 

291 break # We can drop out of the loop here. 

292 # Implied else: resp has no value, so the while loop will end. 

293 policyver += 1 

294 msg = f'New ILM policy name (may already exist): {data.new.ilmname}' 

295 logger.debug(msg) 

296 step.add_log(msg) 

297 if not task.job.dry_run: # Don't create if dry_run 

298 if not policymatch: 

299 # Create the cloned ILM policy 

300 try: 

301 gkw = {'name': data.new.ilmname, 'policy': policy} 

302 api.generic_get(var.client.ilm.put_lifecycle, **gkw) 

303 except (MissingError, BadClientResult) as exc: 

304 _ = f'Unable to put new ILM policy: {exc}' 

305 logger.error(_) 

306 step.add_log(_) 

307 failed_step(task, step, exc) 

308 # Implied else: We've arrived at the expected new ILM name 

309 # and it does match an existing policy in name and content 

310 # so we don't need to create a new one. 

311 else: 

312 _ = ( 

313 f'Dry-Run: No changes, but expected behavior: ' 

314 f'ILM policy {data.new.ilmname} created' 

315 ) 

316 logger.debug(_) 

317 step.add_log(_) 

318 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

319 

320 

321def un_ilm_the_restored_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

322 """Remove the lifecycle data from the settings of the restored index""" 

323 missing_data(stepname, kwargs) 

324 drm = f'Any existing ILM policy removed from {var.redaction_target}' 

325 metastep( 

326 task, 

327 stepname, 

328 api.remove_ilm_policy, 

329 var.client, 

330 var.redaction_target, 

331 dry_run_msg=drm, 

332 ) 

333 

334 

335def redact_from_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

336 """Run update by query on new restored index""" 

337 missing_data(stepname, kwargs) 

338 drm = ( 

339 f'Redact index {var.redaction_target} replacing content of fields: ' 

340 f'{task.job.config["fields"]} with message: {task.job.config["message"]}' 

341 ) 

342 metastep( 

343 task, 

344 stepname, 

345 api.redact_from_index, 

346 var.client, 

347 var.redaction_target, 

348 task.job.config, 

349 dry_run_msg=drm, 

350 ) 

351 

352 

353def _meta_forcemerge_index(task: 'Task', var: DotMap, **kwargs) -> str: 

354 """Do some task logging around the forcemerge api call""" 

355 step = kwargs.pop('step', None) 

356 if step is None: 

357 raise MissingArgument('_meta_forcemerge_index', 'keyword argument', 'step') 

358 index = var.redaction_target 

359 msg = f'Before forcemerge, {api.report_segment_count(var.client, index)}' 

360 logger.info(msg) 

361 step.add_log(msg) 

362 fmkwargs = {} 

363 if 'forcemerge' in task.job.config: 

364 fmkwargs = task.job.config['forcemerge'] 

365 fmkwargs['index'] = index 

366 if 'only_expunge_deletes' in fmkwargs and fmkwargs['only_expunge_deletes']: 

367 msg = 'Forcemerge will only expunge deleted docs!' 

368 logger.info(msg) 

369 step.add_log(msg) 

370 else: 

371 mns = 1 # default value 

372 if 'max_num_segments' in fmkwargs and isinstance( 

373 fmkwargs['max_num_segments'], int 

374 ): 

375 mns = fmkwargs['max_num_segments'] 

376 msg = f'Proceeding to forcemerge to {mns} segments per shard' 

377 logger.info(msg) 

378 step.add_log(msg) 

379 logger.debug('forcemerge kwargs = %s', fmkwargs) 

380 # Do the actual forcemerging 

381 api.forcemerge_index(var.client, **fmkwargs) 

382 msg = f'After forcemerge, {api.report_segment_count(var.client, index)}' 

383 logger.info(msg) 

384 step.add_log(msg) 

385 return msg 

386 

387 

388def forcemerge_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

389 """Force merge redacted index""" 

390 missing_data(stepname, kwargs) 

391 msg = '' 

392 fmkwargs = {} 

393 if 'forcemerge' in task.job.config: 

394 fmkwargs = task.job.config['forcemerge'] 

395 if 'only_expunge_deletes' in fmkwargs and fmkwargs['only_expunge_deletes']: 

396 msg = 'only expunging deleted docs' 

397 else: 

398 mns = 1 # default value 

399 if 'max_num_segments' in fmkwargs and isinstance( 

400 fmkwargs['max_num_segments'], int 

401 ): 

402 mns = fmkwargs['max_num_segments'] 

403 msg = f'to {mns} segments per shard' 

404 drm = f'Forcemerge index {var.redaction_target} {msg}' 

405 metastep( 

406 task, 

407 stepname, 

408 _meta_forcemerge_index, 

409 task, 

410 var, 

411 include_step=True, 

412 dry_run_msg=drm, 

413 ) 

414 

415 

416def clear_cache(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

417 """Clear cache of redacted index""" 

418 missing_data(stepname, kwargs) 

419 drm = f'Clear cache of index {var.redaction_target}' 

420 metastep( 

421 task, 

422 stepname, 

423 api.clear_cache, 

424 var.client, 

425 var.redaction_target, 

426 dry_run_msg=drm, 

427 ) 

428 

429 

430def confirm_redaction(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

431 """Check update by query did its job""" 

432 missing_data(stepname, kwargs) 

433 drm = f'Confirm redaction of index {var.redaction_target}' 

434 metastep( 

435 task, 

436 stepname, 

437 api.check_index, 

438 var.client, 

439 var.redaction_target, 

440 task.job.config, 

441 dry_run_msg=drm, 

442 ) 

443 

444 

445def snapshot_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

446 """Create a new snapshot for mounting our redacted index""" 

447 missing_data(stepname, kwargs) 

448 drm = f'Snapshot index {var.redaction_target} to {var.new_snap_name}' 

449 metastep( 

450 task, 

451 stepname, 

452 api.take_snapshot, 

453 var.client, 

454 var.repository, 

455 var.new_snap_name, 

456 var.redaction_target, 

457 dry_run_msg=drm, 

458 ) 

459 

460 

461def mount_snapshot(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

462 """ 

463 Mount the index as a searchable snapshot to make the redacted index available 

464 """ 

465 missing_data(stepname, kwargs) 

466 drm = ( 

467 f'Mount index {var.redaction_target} in snapshot ' 

468 f'{var.new_snap_name} as {var.mount_name}' 

469 ) 

470 metastep(task, stepname, api.mount_index, var, dry_run_msg=drm) 

471 

472 

473def apply_ilm_policy(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

474 """ 

475 If the index was associated with an ILM policy, associate it with the 

476 new, cloned ILM policy. 

477 """ 

478 missing_data(stepname, kwargs) 

479 data = kwargs['data'] 

480 if data.new.ilmname: 

481 settings = {'index': {}} # type: ignore 

482 # Add all of the original lifecycle settings 

483 settings['index']['lifecycle'] = data.index.lifecycle.toDict() 

484 # Replace the name with the new ILM policy name 

485 settings['index']['lifecycle']['name'] = data.new.ilmname 

486 drm = f'Apply new ILM policy {data.new.ilmname} to {var.mount_name}' 

487 metastep( 

488 task, 

489 stepname, 

490 api.put_settings, 

491 var.client, 

492 var.mount_name, 

493 settings, 

494 dry_run_msg=drm, 

495 ) 

496 

497 

498def confirm_ilm_phase(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

499 """ 

500 Confirm the mounted index is in the expected ILM phase 

501 This is done by using move_to_step. If it's already in the step, no problem. 

502 If it's in step ``new``, this will advance the index to the expected step. 

503 """ 

504 missing_data(stepname, kwargs) 

505 step = Step(task=task, stepname=stepname) 

506 if step.finished(): 

507 logger.info('%s: already completed', step.stub) 

508 return 

509 step.begin() 

510 if task.job.dry_run: 

511 msg = f'Dry-Run: {var.mount_name} moved to ILM phase {var.phase}' 

512 logger.debug(msg) 

513 step.add_log(msg) 

514 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

515 return 

516 # Wait for phase to be "new" 

517 waitkw = {'pause': PAUSE_VALUE, 'timeout': TIMEOUT_VALUE} 

518 try: 

519 # Update in es_wait 0.9.2: 

520 # - If you send phase='new', it will wait for the phase to be 'new' or higher 

521 # - This is where a user was getting stuck. They were waiting for 'new' but 

522 # - the phase was already 'frozen', so it was endlessly checking for 'new'. 

523 es_waiter(var.client, IlmPhase, name=var.mount_name, phase='new', **waitkw) 

524 # Wait for step to be "complete" 

525 es_waiter(var.client, IlmStep, name=var.mount_name, **waitkw) 

526 except BadClientResult as bad: 

527 _ = f'ILM step confirmation problem -- ERROR: {bad}' 

528 logger.error(_) 

529 step.add_log(_) 

530 failed_step(task, step, bad) 

531 

532 def get_currstep(): 

533 try: 

534 _ = api.generic_get(var.client.ilm.explain_lifecycle, index=var.mount_name) 

535 except MissingError as exc: 

536 _ = f'Unable to get ILM phase of {var.mount_name}' 

537 logger.error(_) 

538 step.add_log(_) 

539 failed_step(task, step, exc) 

540 try: 

541 expl = _['indices'][var.mount_name] 

542 except KeyError as err: 

543 msg = f'{var.mount_name} not found in ILM explain data: {err}' 

544 logger.error(msg) 

545 step.add_log(msg) 

546 failed_step(task, step, err) 

547 if 'managed' not in expl: 

548 msg = f'Index {var.mount_name} is not managed by ILM' 

549 step.add_log(msg) 

550 failed_step( 

551 task, step, ValueMismatch(msg, expl['managed'], '{"managed": True}') 

552 ) 

553 return {'phase': expl['phase'], 'action': expl['action'], 'name': expl['step']} 

554 

555 nextstep = {'phase': var.phase, 'action': 'complete', 'name': 'complete'} 

556 if task.job.dry_run: # Don't actually move_to_step if dry_run 

557 msg = ( 

558 f'{stepname}: Dry-Run: {var.mount_name} not moved/confirmed to ILM ' 

559 f'phase {var.phase}' 

560 ) 

561 logger.debug(msg) 

562 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

563 return 

564 

565 # We will try to move the index to the expected phase up to 3 times 

566 # before failing the step. 

567 attempts = 0 

568 success = False 

569 while attempts < 3 and not success: 

570 # Since we are now testing for 'new' or higher, we may not need to advance 

571 # ILM phases. If the current step is already where we expect to be, log 

572 # confirmation and move on. 

573 logger.debug('Attempt number: %s', attempts) 

574 currstep = get_currstep() 

575 if currstep == nextstep: 

576 msg = ( 

577 f'{stepname}: {var.mount_name} is confirmed to be in ILM phase ' 

578 f'{var.phase}' 

579 ) 

580 logger.debug(msg) 

581 step.add_log(msg) 

582 # Set both while loop critera to values that will end the loop 

583 success = True 

584 attempts = 3 

585 else: 

586 # If we are not yet in the expected target phase, then proceed with the 

587 # ILM phase change. 

588 logger.debug('Current ILM Phase: %s', currstep) 

589 logger.debug('Target ILM Phase: %s', nextstep) 

590 logger.debug('PHASE: %s', var.phase) 

591 try: 

592 api.ilm_move(var.client, var.mount_name, currstep, nextstep) 

593 success = True 

594 except BadClientResult as bad: 

595 logger.debug('Attempt failed. Incrementing attempts.') 

596 attempts += 1 

597 if attempts == 3: 

598 _ = 'Attempt limit reached. Failing step.' 

599 logger.error(_) 

600 step.add_log(_) 

601 failed_step(task, step, bad) 

602 logger.debug('Waiting %s seconds before retrying...', PAUSE_VALUE) 

603 time.sleep(PAUSE_VALUE) 

604 logger.warning('ILM move failed: %s -- Retrying...', bad.message) 

605 continue 

606 try: 

607 es_waiter( 

608 var.client, IlmPhase, name=var.mount_name, phase=var.phase, **waitkw 

609 ) 

610 es_waiter(var.client, IlmStep, name=var.mount_name, **waitkw) 

611 except BadClientResult as phase_err: 

612 msg = f'Unable to wait for ILM step to complete -- ERROR: {phase_err}' 

613 logger.error(msg) 

614 step.add_log(msg) 

615 failed_step(task, step, phase_err) 

616 # If we make it here, we have successfully moved the index to the expected phase 

617 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

618 

619 

620def delete_redaction_target(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

621 """ 

622 Now that it's mounted (with a new name), we should delete the redaction_target 

623 index 

624 """ 

625 missing_data(stepname, kwargs) 

626 drm = f'Delete redaction target index {var.redaction_target}' 

627 metastep( 

628 task, 

629 stepname, 

630 api.delete_index, 

631 var.client, 

632 var.redaction_target, 

633 dry_run_msg=drm, 

634 ) 

635 

636 

637def fix_aliases(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

638 """Using the aliases collected from var.index, update mount_name and verify""" 

639 missing_data(stepname, kwargs) 

640 data = kwargs['data'] 

641 step = Step(task=task, stepname=stepname) 

642 if step.finished(): 

643 logger.info('%s: already completed', step.stub) 

644 return 

645 step.begin() 

646 if data.data_stream: 

647 msg = 'Cannot apply aliases to indices in data_stream' 

648 logger.debug(msg) 

649 step.add_log(msg) 

650 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

651 return 

652 alias_names = var.aliases.toDict().keys() 

653 if not alias_names: 

654 msg = f'No aliases associated with index {var.index}' 

655 step.add_log(msg) 

656 logger.info(msg) 

657 elif not task.job.dry_run: 

658 msg = f'Transferring aliases to new index ' f'{var.mount_name}' 

659 logger.debug(msg) 

660 step.add_log(msg) 

661 var.client.indices.update_aliases( 

662 actions=get_alias_actions(var.index, var.mount_name, var.aliases.toDict()) 

663 ) 

664 verify = var.client.indices.get(index=var.mount_name)[var.mount_name][ 

665 'aliases' 

666 ].keys() 

667 if alias_names != verify: 

668 msg = f'Alias names do not match! {alias_names} does not match: {verify}' 

669 logger.critical(msg) 

670 step.add_log(msg) 

671 failed_step( 

672 task, step, ValueMismatch(msg, 'alias names mismatch', alias_names) 

673 ) 

674 else: 

675 msg = 'Dry-Run: alias transfer not executed' 

676 logger.debug(msg) 

677 step.add_log(msg) 

678 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

679 

680 

681def un_ilm_the_original_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

682 """ 

683 Remove the lifecycle data from the settings of the original index 

684 

685 This is chiefly done as a safety measure. 

686 """ 

687 missing_data(stepname, kwargs) 

688 metastep(task, stepname, api.remove_ilm_policy, var.client, var.index) 

689 

690 

691def close_old_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

692 """Close old mounted snapshot""" 

693 missing_data(stepname, kwargs) 

694 metastep(task, stepname, api.close_index, var.client, var.index) 

695 

696 

697def delete_old_index(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

698 """Delete old mounted snapshot, if configured to do so""" 

699 missing_data(stepname, kwargs) 

700 step = Step(task=task, stepname=stepname) 

701 if step.finished(): 

702 logger.info('%s: already completed', step.stub) 

703 return 

704 step.begin() 

705 if task.job.config['delete']: 

706 msg = f'Deleting original mounted index: {var.index}' 

707 task.add_log(msg) 

708 logger.info(msg) 

709 try: 

710 api.delete_index(var.client, var.index) 

711 except MissingIndex as miss: 

712 msg = f'Index {var.index} not found for deletion: {miss}' 

713 logger.error(msg) 

714 step.add_log(msg) 

715 except BadClientResult as bad: 

716 msg = f'Bad client result: {bad}' 

717 logger.error(msg) 

718 step.add_log(msg) 

719 failed_step(task, step, bad) 

720 else: 

721 msg = ( 

722 f'delete set to False — not deleting original mounted index: ' 

723 f'{var.index}' 

724 ) 

725 task.add_log(msg) 

726 logger.warning(msg) 

727 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

728 

729 

730def assign_aliases(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

731 """Put the starting index name on new mounted index as alias""" 

732 missing_data(stepname, kwargs) 

733 data = kwargs['data'] 

734 step = Step(task=task, stepname=stepname) 

735 if step.finished(): 

736 logger.info('%s: already completed', step.stub) 

737 return 

738 step.begin() 

739 if data.data_stream: 

740 msg = 'Cannot apply aliases to indices in data_stream' 

741 logger.debug(msg) 

742 step.add_log(msg) 

743 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

744 return 

745 if not task.job.dry_run: 

746 msg = f'Assigning aliases {var.index} to index {var.mount_name}' 

747 logger.debug(msg) 

748 step.add_log(msg) 

749 try: 

750 api.assign_alias(var.client, var.mount_name, var.index) 

751 except BadClientResult as bad: 

752 failed_step(task, step, bad) 

753 else: 

754 msg = f'Assigning aliases {var.index} to index {var.mount_name}' 

755 _ = f'Dry-Run: No changes, but expected behavior: {msg}' 

756 logger.debug(_) 

757 step.add_log(_) 

758 step.end(completed=True, errors=False, logmsg=f'{stepname} completed') 

759 

760 

761def reassociate_index_with_ds(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

762 """ 

763 If the index was associated with a data_stream, reassociate it with the 

764 data_stream again. 

765 """ 

766 missing_data(stepname, kwargs) 

767 data = kwargs['data'] 

768 acts = [{'add_backing_index': {'index': var.mount_name}}] 

769 if data.data_stream: 

770 acts[0]['add_backing_index']['data_stream'] = data.data_stream 

771 logger.debug('%s: Modify data_stream actions: %s', stepname, acts) 

772 drm = f'Reassociate index {var.mount_name} with data_stream {data.data_stream}' 

773 metastep( 

774 task, stepname, api.modify_data_stream, var.client, acts, dry_run_msg=drm 

775 ) 

776 

777 

778def _meta_record_it(task: 'Task', snapname: str) -> str: 

779 """Make a metastep for record_it""" 

780 task.job.cleanup.append(snapname) 

781 return f'Snapshot {snapname} added to cleanup list' 

782 

783 

784def record_it(task: 'Task', stepname, var: DotMap, **kwargs) -> None: 

785 """Record the now-deletable snapshot in the job's tracking index.""" 

786 missing_data(stepname, kwargs) 

787 drm = f'Snapshot {var.ss_snap} added to cleanup list' 

788 metastep(task, stepname, _meta_record_it, task, var.ss_snap, dry_run_msg=drm)