Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 67%

383 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-03-17 23:49 -0600

1"""Functions making Elasticsearch API calls""" 

2 

3import typing as t 

4import time 

5import logging 

6from elasticsearch8.exceptions import ( 

7 ApiError, 

8 NotFoundError, 

9 TransportError, 

10 BadRequestError, 

11) 

12from es_wait import Health, Restore, Snapshot, Task 

13from es_pii_tool.exceptions import ( 

14 BadClientResult, 

15 FatalError, 

16 MissingDocument, 

17 MissingError, 

18 MissingIndex, 

19 ValueMismatch, 

20) 

21from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter, timing 

22 

23if t.TYPE_CHECKING: 

24 from dotmap import DotMap # type: ignore 

25 from elasticsearch8 import Elasticsearch 

26 from elastic_transport import HeadApiResponse 

27 

28 

29logger = logging.getLogger(__name__) 

30 

31# pylint: disable=R0913,R0917,W0707 

32 

33 

34def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None: 

35 """Assign index to alias(es)""" 

36 try: 

37 response = client.indices.put_alias(index=index_name, name=alias_name) 

38 logger.info( 

39 "Index '%s' was successfully added to alias '%s'", index_name, alias_name 

40 ) 

41 logger.debug(response) 

42 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

43 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed' 

44 logger.critical(msg) 

45 raise BadClientResult(msg, err) 

46 

47 

48def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None: 

49 """Check the index""" 

50 logger.info('Making a quick check on redacted index docs...') 

51 result = do_search(client, index_name, job_config['query']) 

52 if result['hits']['total']['value'] == 0: 

53 logger.warning( 

54 'Query returned no results, assuming it only returns docs ' 

55 'to be redacted and not already redacted...' 

56 ) 

57 return 

58 success = check_fields(result, job_config) 

59 if not success: 

60 msg = 'One or more fields were not redacted. Check the logs' 

61 logger.error(msg) 

62 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0') 

63 

64 

65def clear_cache(client: 'Elasticsearch', index_name: str) -> None: 

66 """Clear the cache for named index 

67 

68 :param client: A client connection object 

69 :param index_name: The index name 

70 

71 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

72 :type index_name: str 

73 

74 :returns: No return value 

75 :rtype: None 

76 """ 

77 response = {} 

78 logger.info('Clearing cache data for %s...', index_name) 

79 try: 

80 response = dict( 

81 client.indices.clear_cache( 

82 index=index_name, expand_wildcards=['open', 'hidden'] 

83 ) 

84 ) 

85 logger.debug(response) 

86 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

87 logger.error('clear_cache API call resulted in an error: %s', err) 

88 

89 

90def close_index(client: 'Elasticsearch', name: str) -> None: 

91 """Close an index 

92 

93 :param name: The index name to close 

94 

95 :type name: str 

96 """ 

97 try: 

98 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden']) 

99 logger.debug(response) 

100 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

101 logger.error("Index: '%s' not found. Error: %s", name, err) 

102 raise MissingIndex(f'Index "{name}" not found', err, name) 

103 

104 

105def create_index( 

106 client: 'Elasticsearch', 

107 name: str, 

108 mappings: t.Union[t.Dict, None] = None, 

109 settings: t.Union[t.Dict, None] = None, 

110) -> None: 

111 """Create an Elasticsearch index with associated mappings and settings 

112 

113 :param name: The index name 

114 :param mappings: The index mappings 

115 :param settings: The index settings 

116 

117 :type name: str 

118 :type mappings: dict 

119 :type settings: dict 

120 """ 

121 if index_exists(client, name): 

122 logger.info('Index %s already exists', name) 

123 return 

124 try: 

125 response = client.indices.create( 

126 index=name, settings=settings, mappings=mappings 

127 ) 

128 logger.debug(response) 

129 except BadRequestError as err: 

130 logger.error("Index: '%s' already exists. Error: %s", name, err) 

131 raise BadClientResult(f'Index "{name}" already exists', err) 

132 except (ApiError, TransportError) as err: 

133 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err) 

134 raise BadClientResult(f'Unknown error trying to create index: {name}', err) 

135 

136 

137def delete_index(client: 'Elasticsearch', name: str) -> None: 

138 """Delete an index 

139 

140 :param client: A client connection object 

141 :param name: The index name to delete 

142 

143 :type name: str 

144 """ 

145 try: 

146 response = client.indices.delete( 

147 index=name, expand_wildcards=['open', 'hidden'] 

148 ) 

149 logger.debug(response) 

150 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

151 # logger.error("Index: '%s' not found. Error: %s", name, err) 

152 raise MissingIndex(f'Index "{name}" not found', err, name) 

153 

154 

155def do_search( 

156 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10 

157) -> t.Dict: 

158 """Return search result of ``query`` against ``index_pattern`` 

159 

160 :param client: A client connection object 

161 :param index_pattern: A single index name, a csv list of indices, or other pattern 

162 :param query: An Elasticsearch DSL search query 

163 :param size: Maximum number of results to return 

164 

165 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

166 :type index_pattern: str 

167 :type query: dict 

168 :type size: int 

169 """ 

170 kwargs = { 

171 'index': index_pattern, 

172 'query': query, 

173 'size': size, 

174 'expand_wildcards': ['open', 'hidden'], 

175 } 

176 logger.debug('Search kwargs = %s', kwargs) 

177 try: 

178 response = dict(client.search(**kwargs)) # type: ignore 

179 logger.debug(response) 

180 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

181 msg = f'Attempt to collect search results yielded an exception: {err}' 

182 logger.critical(msg) 

183 raise BadClientResult(msg, err) 

184 return response 

185 

186 

187def forcemerge_index( 

188 client: 'Elasticsearch', 

189 index: t.Union[str, None] = None, 

190 max_num_segments: int = 1, 

191 only_expunge_deletes: bool = False, 

192) -> None: 

193 """ 

194 Force Merge an index 

195 

196 :param client: A client connection object 

197 :param index: A single index name 

198 :param max_num_segments: The maximum number of segments per shard after a 

199 force merge 

200 :param only_expunge_deletes: Only expunge deleted docs during force merging. 

201 If True, ignores max_num_segments. 

202 

203 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

204 :type index: str 

205 :type max_num_segments: int 

206 :type only_expunge_deletes: bool 

207 """ 

208 kwargs = {'index': index, 'wait_for_completion': False} 

209 if only_expunge_deletes: 

210 kwargs.update({'only_expunge_deletes': only_expunge_deletes}) 

211 else: 

212 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore 

213 try: 

214 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore 

215 logger.debug(response) 

216 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

217 logger.error("Index: '%s' not found. Error: %s", index, err) 

218 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore 

219 logger.info('Waiting for forcemerge to complete...') 

220 pause, timeout = timing('task') 

221 logger.debug(f'ENV pause = {pause}, timeout = {timeout}') 

222 try: 

223 # task_check.wait() 

224 es_waiter( 

225 client, 

226 Task, 

227 action='forcemerge', 

228 task_id=response['task'], 

229 pause=pause, 

230 timeout=timeout, 

231 ) 

232 except BadClientResult as exc: 

233 logger.error('Exception: %s', exc) 

234 raise FatalError('Failed to forcemerge', exc) 

235 logger.info('Forcemerge completed.') 

236 

237 

238def generic_get(func: t.Callable, **kwargs) -> t.Dict: 

239 """Generic, reusable client request getter""" 

240 try: 

241 response = dict(func(**kwargs)) 

242 logger.debug(response) 

243 except NotFoundError as nferr: 

244 raise MissingError('Generic Get MissingError', nferr, nferr.info) 

245 except (ApiError, TransportError, BadRequestError) as err: 

246 raise BadClientResult('Generic Get BadClientResult Failure', err) 

247 return response 

248 

249 

250def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int: 

251 """Return the number of hits matching the query 

252 

253 :param client: A client connection object 

254 :param index: The index or pattern to search 

255 :param query: The query to execute 

256 

257 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

258 :type index: str 

259 :type query: dict 

260 

261 :returns: The number of hits matching the query 

262 """ 

263 result = do_search(client, index, query) 

264 return result['hits']['total']['value'] 

265 

266 

267def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict: 

268 """Get the ILM lifecycle settings for an index 

269 

270 :param client: A client connection object 

271 :param index: The index to check 

272 

273 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

274 :type index: str 

275 

276 :returns: The ILM settings object for the named index 

277 """ 

278 try: 

279 response = dict(client.ilm.explain_lifecycle(index=index)) 

280 logger.debug(response) 

281 except NotFoundError as err: 

282 logger.error("Index: '%s' not found. Error: %s", index, err) 

283 raise MissingIndex(f'Index "{index}" not found', err, index) 

284 return response 

285 

286 

287def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict: 

288 """Get the ILM lifecycle settings for an policyname 

289 

290 :param client: A client connection object 

291 :param policyname: The ILM policy name to check 

292 

293 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

294 :type policyname: str 

295 

296 :returns: The ILM settings object for the named policy, or None 

297 """ 

298 retval = {} 

299 try: 

300 retval = dict(client.ilm.get_lifecycle(name=policyname)) 

301 except NotFoundError: 

302 logger.debug("ILM policy '%s' not found.", policyname) 

303 return retval 

304 

305 

306def get_index(client: 'Elasticsearch', index: str) -> t.Dict: 

307 """Get the info about an index 

308 

309 :param client: A client connection object 

310 :param index: The index, csv indices, or index pattern to get 

311 

312 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

313 :type index: str 

314 

315 :returns: The index information object for the named index 

316 """ 

317 try: 

318 response = dict( 

319 client.indices.get(index=index, expand_wildcards=['open', 'hidden']) 

320 ) 

321 logger.debug('Found indices: %s', list(response.keys())) 

322 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

323 logger.error("Index: '%s' not found. Error: %s", index, err) 

324 raise MissingIndex(f'Index "{index}" not found', err, index) 

325 return response 

326 

327 

328def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]: 

329 """Get the index's ILM phase 

330 

331 :param client: A client connection object 

332 :param index: The index name 

333 

334 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

335 :type index: str 

336 

337 :returns: The ILM phase of ``index`` 

338 """ 

339 phase = None 

340 ilm = get_ilm(client, index) 

341 try: 

342 phase = ilm['indices'][index]['phase'] 

343 except KeyError: # Perhaps in cold/frozen but not ILM affiliated 

344 settings = get_settings(client, index)[index]['settings']['index'] 

345 if "store" in settings: 

346 # Checking if it's a mounted searchable snapshot 

347 if settings["store"]["type"] == "snapshot": 

348 phase = get_phase_from_tier_pref(settings) 

349 else: 

350 phase = None 

351 return phase 

352 

353 

354def get_phase_from_tier_pref( 

355 idx_settings: t.Dict, 

356) -> t.Union[t.Literal['frozen', 'cold'], None]: 

357 """ 

358 Check the index's ``_tier_preference`` as an indicator which phase the index is in 

359 

360 :param idx_settings: The results from a 

361 get_settings(index=idx)[idx]['settings']['index'] call 

362 

363 :returns: The ILM phase based on the index settings, or None 

364 """ 

365 try: 

366 tiers = idx_settings['routing']['allocation']['include']['_tier_preference'] 

367 except KeyError: 

368 tiers = '' 

369 if tiers == 'data_frozen': 

370 return 'frozen' 

371 if 'data_cold' in tiers.split(','): 

372 return 'cold' 

373 return None 

374 

375 

376def ilm_move( 

377 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict 

378) -> None: 

379 """Move index 'name' from the current step to the next step""" 

380 try: 

381 client.ilm.move_to_step( 

382 index=name, current_step=current_step, next_step=next_step 

383 ) 

384 except Exception as err: 

385 msg = ( 

386 f'Unable to move index {name} to ILM next step: {next_step}. ' 

387 f'Error: {err}' 

388 ) 

389 logger.critical(msg) 

390 raise BadClientResult(msg, err) 

391 

392 

393def modify_data_stream( 

394 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]] 

395) -> None: 

396 """Modify a data_stream using the contents of actions 

397 

398 :param client: A client connection object 

399 :param actions: The actions to take 

400 

401 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

402 :type actions: dict 

403 """ 

404 try: 

405 client.indices.modify_data_stream(actions=actions) 

406 except BadRequestError as exc: 

407 logger.error( 

408 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc 

409 ) 

410 raise MissingIndex( 

411 'Missing either data_stream or index', exc, f'actions: {actions}' 

412 ) 

413 

414 

415def report_segment_count(client: 'Elasticsearch', index: str) -> str: 

416 """ 

417 Report the count of segments from index 

418 

419 :param client: A client connection object 

420 :param index: The index to check 

421 

422 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

423 :type index: str 

424 

425 :returns: Formatted message describing shard count and segment count for index 

426 """ 

427 shardcount = 0 

428 segmentcount = 0 

429 try: 

430 output = client.cat.shards( 

431 index=index, format='json', h=['index', 'shard', 'prirep', 'sc'] 

432 ) 

433 except Exception as exc: 

434 logger.error('Exception: %s', exc) 

435 raise BadClientResult('Unable to get cat shards output', exc) 

436 for shard in output: 

437 if shard['prirep'] == 'r': # type: ignore 

438 # Skip replica shards 

439 continue 

440 if index != shard['index']: # type: ignore 

441 logger.warning( 

442 'Index name %s does not match what was returned by the _cat API: %s', 

443 index, 

444 shard['index'], # type: ignore 

445 ) 

446 shardcount += 1 

447 segmentcount += int(shard['sc']) # type: ignore 

448 logger.debug( 

449 'Index %s, shard %s has %s segments', 

450 index, 

451 shard["shard"], # type: ignore 

452 shard["sc"], # type: ignore 

453 ) 

454 

455 return ( 

456 f'index {index} has {shardcount} shards and a total of {segmentcount} ' 

457 f'segments, averaging {float(segmentcount/shardcount)} segments per shard' 

458 ) 

459 

460 

461def get_settings(client: 'Elasticsearch', index: str) -> t.Dict: 

462 """Get the settings for an index 

463 

464 :param client: A client connection object 

465 :param index: The index to check 

466 

467 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

468 :type index: str 

469 

470 :returns: The settings object for the named index 

471 """ 

472 logger.debug('Getting settings for index: %s', index) 

473 try: 

474 response = dict( 

475 client.indices.get_settings( 

476 index=index, expand_wildcards=['open', 'hidden'] 

477 ) 

478 ) 

479 logger.debug(response) 

480 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

481 logger.error("Index: '%s' not found. Error: %s", index, err) 

482 raise MissingIndex(f'Index "{index}" not found', err, index) 

483 logger.debug('Index settings collected.') 

484 return response 

485 

486 

487def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None: 

488 """Modify a data_stream using the contents of actions 

489 

490 :param client: A client connection object 

491 :param settings: The index settings to apply 

492 

493 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

494 :type settings: dict 

495 """ 

496 try: 

497 client.indices.put_settings(index=index, settings=settings) 

498 except NotFoundError as exc: 

499 logger.error("Index '%s' not found: %s", index, exc) 

500 raise MissingIndex('Index not found', exc, index) 

501 except BadRequestError as exc: 

502 logger.error("Bad settings: %s. ERROR: %s", settings, exc) 

503 raise BadClientResult(f'Invalid settings: {settings}', exc) 

504 

505 

506def get_progress_doc( 

507 client: 'Elasticsearch', 

508 index_name: str, 

509 job_id: str, 

510 task_id: str, 

511 stepname: str = '', 

512) -> t.Dict: 

513 """Get a task tracking doc 

514 

515 :param client: A client connection object 

516 :param index_name: The index name 

517 :param job_id: The job name string for the present redaction run 

518 :param task_id: The task_id string of the task we are searching for 

519 :param stepname: [Optional] The step name string of the step we are searching for 

520 

521 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

522 :type index_name: str 

523 :type job_id: str 

524 :type task_id: str 

525 :type stepname: str 

526 

527 :returns: The progress tracking document from the progress/status tracking index 

528 for the task or step 

529 """ 

530 # Base value for stub (task) 

531 stub = f'Task: {task_id} of Job: {job_id}' 

532 # The proto query 

533 query = { 

534 "bool": { 

535 "must": {"parent_id": {"type": "task", "id": job_id}}, 

536 "filter": [], 

537 } 

538 } 

539 # The base value of the bool filter (task) 

540 filters = [ 

541 {"term": {"task": task_id}}, 

542 {"term": {"job": job_id}}, 

543 ] 

544 if not stepname: 

545 logger.info('Tracking progress for %s', stub) 

546 # For Tasks progress docs, we must not match docs with a step field 

547 query['bool']['must_not'] = {"exists": {"field": "step"}} 

548 else: 

549 # Update stub to be for a step 

550 stub = f'Step: {stepname} of Task: {task_id} of Job: {job_id}' 

551 logger.info('Tracking progress for %s', stub) 

552 # Update filters to include step 

553 filters.append({"term": {"step": stepname}}) 

554 # Add the filters to the query 

555 query['bool']['filter'] = filters # type: ignore 

556 try: 

557 result = do_search(client, index_pattern=index_name, query=query) 

558 except NotFoundError as err: 

559 msg = f'Tracking index {index_name} is missing' 

560 logger.critical(msg) 

561 raise MissingIndex(msg, err, index_name) 

562 # First get the edge case of multiple hits out of the way 

563 if result['hits']['total']['value'] > 1: 

564 msg = f'Tracking document for {stub} is not unique. This should never happen.' 

565 logger.critical(msg) 

566 raise FatalError(msg, ValueError()) 

567 # After the > 1 test, if we don't have exactly 1 hit, we have zero hits 

568 if result['hits']['total']['value'] != 1: 

569 msg = f'Tracking document for {stub} does not exist' 

570 missing = f'A document with step: {stepname}, task: {task_id}, job: {job_id}' 

571 logger.debug(msg) 

572 raise MissingDocument(msg, Exception(), missing) 

573 # There can be only one... 

574 return result['hits']['hits'][0] 

575 

576 

577def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict: 

578 """Get the progress/status tracking doc for the provided job_id 

579 

580 :param client: A client connection object 

581 :param index_name: The index name 

582 :param job_id: The job_id string for the present redaction run 

583 

584 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

585 :type index_name: str 

586 :type job_id: str 

587 

588 :returns: The tracking document from the progress/status tracking index 

589 """ 

590 if not index_exists(client, index_name): 

591 msg = f'Tracking index {index_name} is missing' 

592 logger.critical(msg) 

593 raise MissingIndex(msg, Exception(), index_name) 

594 try: 

595 doc = dict(client.get(index=index_name, id=job_id)) 

596 # logger.debug('TRACKING DOC = %s', doc) 

597 except NotFoundError as exc: 

598 msg = f'Tracking document for job_id {job_id} does not exist' 

599 logger.debug(msg) 

600 raise MissingDocument(msg, exc, job_id) 

601 return doc['_source'] 

602 

603 

604def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse': 

605 """Test whether index ``index_name`` exists 

606 

607 :param client: A client connection object 

608 :param index_name: The index name 

609 

610 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

611 :type index_name: str 

612 

613 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise 

614 ``HeadApiResponse(False)`` 

615 """ 

616 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden']) 

617 

618 

619def job_exists( 

620 client: 'Elasticsearch', index_name: str, job_id: str 

621) -> 'HeadApiResponse': 

622 """Test whether a document exists for the present job_id 

623 

624 :param client: A client connection object 

625 :param index_name: The index name 

626 :param job_id: The job_id string for the present redaction run 

627 

628 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

629 :type index_name: str 

630 :type job_id: str 

631 

632 :returns: ``HeadApiResponse(True)`` if a document exists with the present 

633 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)`` 

634 """ 

635 return client.exists(index=index_name, id=job_id) 

636 

637 

638def mount_index(var: 'DotMap') -> None: 

639 """Mount index as a searchable snapshot 

640 

641 :param var: A collection of variables from 

642 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var` 

643 

644 :type var: DotMap 

645 """ 

646 response = {} 

647 msg = ( 

648 f'Mounting {var.redaction_target} renamed as {var.mount_name} ' 

649 f'from repository: {var.repository}, snapshot: {var.new_snap_name} ' 

650 f'with storage={var.storage}' 

651 ) 

652 logger.debug(msg) 

653 while index_exists(var.client, var.mount_name): 

654 logger.warning('Index %s exists. Deleting before remounting', var.mount_name) 

655 delete_index(var.client, var.mount_name) 

656 time.sleep(3.0) 

657 try: 

658 response = dict( 

659 var.client.searchable_snapshots.mount( 

660 repository=var.repository, 

661 snapshot=var.new_snap_name, 

662 index=var.redaction_target, 

663 renamed_index=var.mount_name, 

664 storage=var.storage, 

665 ) 

666 ) 

667 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

668 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err) 

669 logger.debug(response) 

670 raise BadClientResult('Error when mount index attempted', err) 

671 logger.info('Ensuring searchable snapshot mount is in "green" health state...') 

672 pause, timeout = timing('health') 

673 logger.debug(f'ENV pause = {pause}, timeout = {timeout}') 

674 try: 

675 es_waiter( 

676 var.client, 

677 Health, 

678 check_type='status', 

679 indices=var.mount_name, 

680 pause=pause, 

681 timeout=timeout, 

682 ) 

683 except BadClientResult as exc: 

684 logger.error('Exception: %s', exc) 

685 raise FatalError('Failed to mount index from snapshot', exc) 

686 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name) 

687 

688 

689def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict: 

690 """Resolve an index 

691 

692 :param client: A client connection object 

693 :param index: The index name 

694 

695 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

696 :type index: str 

697 

698 :returns: The return value from 

699 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index` 

700 :rtype: dict 

701 """ 

702 logger.debug('Resolving index: %s', index) 

703 try: 

704 response = dict( 

705 client.indices.resolve_index( 

706 name=index, expand_wildcards=['open', 'hidden'] 

707 ) 

708 ) 

709 logger.debug(response) 

710 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

711 logger.error("Index: '%s' not found. Error: %s", index, err) 

712 raise MissingIndex(f'Index "{index}" not found', err, index) 

713 logger.debug('Index resolved.') 

714 return response 

715 

716 

717def restore_index( 

718 client: 'Elasticsearch', 

719 repo_name: str, 

720 snap_name: str, 

721 index_name: str, 

722 replacement: str, 

723 re_pattern: str = '(.+)', 

724 index_settings: t.Union[str, None] = None, 

725) -> None: 

726 """Restore an index 

727 

728 :param client: A client connection object 

729 :param repo_name: The repository name 

730 :param snap_name: The snapshot name 

731 :param index_name: The index name as it appears in the snapshot metadata 

732 :param replacement: The name or substitution string to use as the restored index 

733 name 

734 :param re_pattern: The optional rename pattern for use with ``replacement`` 

735 :param index_settings: Any settings to apply to the restored index, such as 

736 _tier_preference 

737 

738 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

739 :type repo_name: str 

740 :type snap_name: str 

741 :type index_name: str 

742 :type replacement: str 

743 :type re_pattern: str 

744 :type index_settings: dict 

745 """ 

746 msg = ( 

747 f"repository={repo_name}, snapshot={snap_name}, indices={index_name}," 

748 f"include_aliases=False," 

749 f"ignore_index_settings=[" 

750 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias'," 

751 f" 'index.routing.allocation.include._tier_preference']," 

752 f"index_settings={index_settings}," 

753 f"rename_pattern={re_pattern}," 

754 f"rename_replacement={replacement}," 

755 f"wait_for_completion=False" 

756 ) 

757 logger.debug('RESTORE settings: %s', msg) 

758 try: 

759 response = client.snapshot.restore( 

760 repository=repo_name, 

761 snapshot=snap_name, 

762 indices=index_name, 

763 include_aliases=False, 

764 ignore_index_settings=[ 

765 'index.lifecycle.name', 

766 'index.lifecycle.rollover_alias', 

767 'index.routing.allocation.include._tier_preference', 

768 ], 

769 index_settings=index_settings, # type: ignore 

770 rename_pattern=re_pattern, 

771 rename_replacement=replacement, 

772 wait_for_completion=False, 

773 ) 

774 logger.debug('Response = %s', response) 

775 logger.info('Checking if restoration completed...') 

776 pause, timeout = timing('restore') 

777 logger.debug(f'ENV pause = {pause}, timeout = {timeout}') 

778 try: 

779 es_waiter( 

780 client, Restore, index_list=[replacement], pause=pause, timeout=timeout 

781 ) 

782 except BadClientResult as bad: 

783 logger.error('Exception: %s', bad) 

784 raise BadClientResult('Failed to restore index from snapshot', bad) 

785 msg = f'Restoration of index {index_name} as {replacement} complete' 

786 logger.info(msg) 

787 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

788 msg = ( 

789 f'Restoration of index {index_name} as {replacement} yielded an error: ' 

790 f'{err}' 

791 ) 

792 logger.error(msg) 

793 raise BadClientResult(msg, err) 

794 # verify index is green 

795 logger.info('Ensuring restored index is in "green" health state...') 

796 res = dict(client.cluster.health(index=replacement, filter_path='status')) 

797 logger.debug('res = %s', res) 

798 if res['status'] == 'red': 

799 msg = f'Restored index {replacement} is not in a healthy state' 

800 logger.error(msg) 

801 raise ValueMismatch(msg, 'index health is "red"', 'green or yellow') 

802 

803 

804def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None: 

805 """Redact data from an index using a painless script. 

806 

807 Collect the task_id and wait for the reinding job to complete before returning 

808 

809 :param client: A client connection object 

810 :param index_name: The index to act on 

811 :param config: The config block being iterated. Contains ``query``, ``message``, 

812 and ``fields`` 

813 

814 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

815 :type index_name: str 

816 :type config: dict 

817 """ 

818 logger.debug('Begin redaction...') 

819 logger.info('Before update by query, %s', report_segment_count(client, index_name)) 

820 logger.debug('Updating and redacting data...') 

821 script = build_script(config['message'], config['fields']) 

822 response = {} 

823 try: 

824 response = dict( 

825 client.update_by_query( 

826 index=index_name, 

827 script=script, 

828 query=config['query'], 

829 wait_for_completion=False, 

830 expand_wildcards=['open', 'hidden'], 

831 ) 

832 ) 

833 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

834 logger.critical('update_by_query yielded an error: %s', err) 

835 raise FatalError('update_by_query API call failed', err) 

836 logger.debug('Checking update by query status...') 

837 logger.debug('response = %s', response) 

838 pause, timeout = timing('task') 

839 logger.debug(f'ENV pause = {pause}, timeout = {timeout}') 

840 try: 

841 es_waiter( 

842 client, 

843 Task, 

844 action='update_by_query', 

845 task_id=response['task'], 

846 pause=pause, 

847 timeout=timeout, 

848 ) 

849 except BadClientResult as exc: 

850 logger.error('Exception: %s', exc) 

851 raise FatalError('Failed to complete update by query', exc) 

852 logger.info('After update by query, %s', report_segment_count(client, index_name)) 

853 logger.debug('Update by query completed.') 

854 

855 

856def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict: 

857 """Remove any ILM policy associated with index 

858 

859 :param client: A client connection object 

860 :param index: The index 

861 

862 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

863 :type index: str 

864 

865 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}`` 

866 """ 

867 try: 

868 response = dict(client.ilm.remove_policy(index=index)) 

869 logger.debug(response) 

870 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

871 logger.error("Index: '%s' not found. Error: %s", index, err) 

872 raise MissingIndex(f'Index "{index}" not found', err, index) 

873 return response 

874 

875 

876def take_snapshot( 

877 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str 

878) -> None: 

879 """ 

880 Take snapshot of index 

881 

882 :param client: A client connection object 

883 :param repo_name: The repository name 

884 :param snap_name: The snapshot name 

885 :param index_name: The name of the index to snapshot 

886 

887 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

888 :type repo_name: str 

889 :type snap_name: str 

890 :type index_name: str 

891 """ 

892 logger.info('Creating new snapshot...') 

893 response = {} 

894 try: 

895 response = dict( 

896 client.snapshot.create( 

897 repository=repo_name, 

898 snapshot=snap_name, 

899 indices=index_name, 

900 wait_for_completion=False, 

901 ) 

902 ) 

903 logger.debug('Snapshot response: %s', response) 

904 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err: 

905 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}' 

906 logger.critical(msg) 

907 raise BadClientResult(msg, err) 

908 logger.info('Checking on status of snapshot...') 

909 pause, timeout = timing('snapshot') 

910 logger.debug(f'ENV pause = {pause}, timeout = {timeout}') 

911 try: 

912 es_waiter( 

913 client, 

914 Snapshot, 

915 snapshot=snap_name, 

916 repository=repo_name, 

917 pause=pause, 

918 timeout=timeout, 

919 ) 

920 except BadClientResult as exc: 

921 logger.error('Exception: %s', exc) 

922 raise FatalError('Failed to complete index snapshot', exc) 

923 msg = ( 

924 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} ' 

925 f'succeeded.' 

926 ) 

927 logger.info(msg) 

928 

929 

930def update_doc( 

931 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0 

932) -> None: 

933 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc`` 

934 

935 :param client: A client connection object 

936 :param index: The index to write to 

937 :param doc_id: The document doc_id to update 

938 :param doc: The contents of the document 

939 :param routing: Because our tracking doc is using parent/child relationships, we 

940 need to route. We use an integer, but the API calls expect a string, so we 

941 manually cast this value in the API call as one. 

942 

943 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

944 :type index: str 

945 :type doc_id: str 

946 :type doc: dict 

947 :type routing: int 

948 """ 

949 try: 

950 if doc_id: 

951 _ = client.update( 

952 index=index, 

953 id=doc_id, 

954 doc=doc, 

955 doc_as_upsert=True, 

956 routing=str(routing), 

957 refresh=True, 

958 ) 

959 else: 

960 logger.debug('No value for document id. Creating new document.') 

961 _ = client.index( 

962 index=index, document=doc, routing=str(routing), refresh=True 

963 ) 

964 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

965 msg = f'Error updating document: {err.args[0]}' 

966 logger.error(msg) 

967 raise BadClientResult(msg, err) 

968 

969 

970def verify_index(client: 'Elasticsearch', index: str) -> bool: 

971 """Verify the index exists and is an index, not an alias 

972 

973 :param client: A client connection object 

974 :param index: The index to check 

975 

976 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

977 :type index: str 

978 """ 

979 logger.debug('Verifying index: %s', index) 

980 retval = True 

981 response = {} 

982 try: 

983 response = dict( 

984 client.indices.get_settings( 

985 index=index, expand_wildcards=['open', 'hidden'] 

986 ) 

987 ) 

988 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

989 logger.error("Index: '%s' not found. Error: %s", index, err) 

990 retval = False 

991 logger.debug(response) 

992 if len(list(response.keys())) > 1: 

993 # We have more than one key, that means we hit an alias 

994 logger.error('Index %s is one member of an alias.', index) 

995 retval = False 

996 elif list(response.keys())[0] != index: 

997 # There's a 1 to 1 alias, but it is not the index name 

998 logger.error('Index %s is an alias.', index) 

999 retval = False 

1000 return retval