Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 66%

378 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-01-29 19:53 -0700

1"""Functions making Elasticsearch API calls""" 

2 

3from os import getenv 

4import typing as t 

5import time 

6import logging 

7from elasticsearch8.exceptions import ( 

8 ApiError, 

9 NotFoundError, 

10 TransportError, 

11 BadRequestError, 

12) 

13from es_wait import Index, Restore, Snapshot, Task 

14from es_pii_tool.defaults import ( 

15 PAUSE_DEFAULT, 

16 PAUSE_ENVVAR, 

17 TIMEOUT_DEFAULT, 

18 TIMEOUT_ENVVAR, 

19) 

20from es_pii_tool.exceptions import ( 

21 BadClientResult, 

22 FatalError, 

23 MissingDocument, 

24 MissingError, 

25 MissingIndex, 

26 ValueMismatch, 

27) 

28from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter 

29 

30if t.TYPE_CHECKING: 

31 from dotmap import DotMap # type: ignore 

32 from elasticsearch8 import Elasticsearch 

33 from elastic_transport import HeadApiResponse 

34 

35PAUSE_VALUE = float(getenv(PAUSE_ENVVAR, default=PAUSE_DEFAULT)) 

36TIMEOUT_VALUE = float(getenv(TIMEOUT_ENVVAR, default=TIMEOUT_DEFAULT)) 

37WAITKW = {'pause': PAUSE_VALUE, 'timeout': TIMEOUT_VALUE} 

38 

39logger = logging.getLogger(__name__) 

40 

41# pylint: disable=R0913,W0707 

42 

43 

44def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None: 

45 """Assign index to alias(es)""" 

46 try: 

47 response = client.indices.put_alias(index=index_name, name=alias_name) 

48 logger.info( 

49 "Index '%s' was successfully added to alias '%s'", index_name, alias_name 

50 ) 

51 logger.debug(response) 

52 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

53 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed' 

54 logger.critical(msg) 

55 raise BadClientResult(msg, err) 

56 

57 

58def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None: 

59 """Check the index""" 

60 logger.info('Making a quick check on redacted index docs...') 

61 result = do_search(client, index_name, job_config['query']) 

62 if result['hits']['total']['value'] == 0: 

63 logger.warning( 

64 'Query returned no results, assuming it only returns docs ' 

65 'to be redacted and not already redacted...' 

66 ) 

67 return 

68 success = check_fields(result, job_config) 

69 if not success: 

70 msg = 'One or more fields were not redacted. Check the logs' 

71 logger.error(msg) 

72 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0') 

73 

74 

75def clear_cache(client: 'Elasticsearch', index_name: str) -> None: 

76 """Clear the cache for named index 

77 

78 :param client: A client connection object 

79 :param index_name: The index name 

80 

81 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

82 :type index_name: str 

83 

84 :returns: No return value 

85 :rtype: None 

86 """ 

87 response = {} 

88 logger.info('Clearing cache data for %s...', index_name) 

89 try: 

90 response = dict( 

91 client.indices.clear_cache( 

92 index=index_name, expand_wildcards=['open', 'hidden'] 

93 ) 

94 ) 

95 logger.debug(response) 

96 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

97 logger.error('clear_cache API call resulted in an error: %s', err) 

98 

99 

100def close_index(client: 'Elasticsearch', name: str) -> None: 

101 """Close an index 

102 

103 :param name: The index name to close 

104 

105 :type name: str 

106 """ 

107 try: 

108 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden']) 

109 logger.debug(response) 

110 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

111 logger.error("Index: '%s' not found. Error: %s", name, err) 

112 raise MissingIndex(f'Index "{name}" not found', err, name) 

113 

114 

115def create_index( 

116 client: 'Elasticsearch', 

117 name: str, 

118 mappings: t.Union[t.Dict, None] = None, 

119 settings: t.Union[t.Dict, None] = None, 

120) -> None: 

121 """Create an Elasticsearch index with associated mappings and settings 

122 

123 :param name: The index name 

124 :param mappings: The index mappings 

125 :param settings: The index settings 

126 

127 :type name: str 

128 :type mappings: dict 

129 :type settings: dict 

130 """ 

131 if index_exists(client, name): 

132 logger.info('Index %s already exists', name) 

133 return 

134 try: 

135 response = client.indices.create( 

136 index=name, settings=settings, mappings=mappings 

137 ) 

138 logger.debug(response) 

139 except BadRequestError as err: 

140 logger.error("Index: '%s' already exists. Error: %s", name, err) 

141 raise BadClientResult(f'Index "{name}" already exists', err) 

142 except (ApiError, TransportError) as err: 

143 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err) 

144 raise BadClientResult(f'Unknown error trying to create index: {name}', err) 

145 

146 

147def delete_index(client: 'Elasticsearch', name: str) -> None: 

148 """Delete an index 

149 

150 :param client: A client connection object 

151 :param name: The index name to delete 

152 

153 :type name: str 

154 """ 

155 try: 

156 response = client.indices.delete( 

157 index=name, expand_wildcards=['open', 'hidden'] 

158 ) 

159 logger.debug(response) 

160 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

161 # logger.error("Index: '%s' not found. Error: %s", name, err) 

162 raise MissingIndex(f'Index "{name}" not found', err, name) 

163 

164 

165def do_search( 

166 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10 

167) -> t.Dict: 

168 """Return search result of ``query`` against ``index_pattern`` 

169 

170 :param client: A client connection object 

171 :param index_pattern: A single index name, a csv list of indices, or other pattern 

172 :param query: An Elasticsearch DSL search query 

173 :param size: Maximum number of results to return 

174 

175 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

176 :type index_pattern: str 

177 :type query: dict 

178 :type size: int 

179 """ 

180 kwargs = { 

181 'index': index_pattern, 

182 'query': query, 

183 'size': size, 

184 'expand_wildcards': ['open', 'hidden'], 

185 } 

186 logger.debug('Search kwargs = %s', kwargs) 

187 try: 

188 response = dict(client.search(**kwargs)) # type: ignore 

189 logger.debug(response) 

190 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

191 msg = f'Attempt to collect search results yielded an exception: {err}' 

192 logger.critical(msg) 

193 raise BadClientResult(msg, err) 

194 return response 

195 

196 

197def forcemerge_index( 

198 client: 'Elasticsearch', 

199 index: t.Union[str, None] = None, 

200 max_num_segments: int = 1, 

201 only_expunge_deletes: bool = False, 

202) -> None: 

203 """ 

204 Force Merge an index 

205 

206 :param client: A client connection object 

207 :param index: A single index name 

208 :param max_num_segments: The maximum number of segments per shard after a 

209 force merge 

210 :param only_expunge_deletes: Only expunge deleted docs during force merging. 

211 If True, ignores max_num_segments. 

212 

213 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

214 :type index: str 

215 :type max_num_segments: int 

216 :type only_expunge_deletes: bool 

217 """ 

218 kwargs = {'index': index, 'wait_for_completion': False} 

219 if only_expunge_deletes: 

220 kwargs.update({'only_expunge_deletes': only_expunge_deletes}) 

221 else: 

222 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore 

223 try: 

224 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore 

225 logger.debug(response) 

226 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

227 logger.error("Index: '%s' not found. Error: %s", index, err) 

228 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore 

229 logger.info('Waiting for forcemerge to complete...') 

230 # task_check = Task( 

231 # client, 

232 # action='forcemerge', 

233 # task_id=response['task'], 

234 # pause=PAUSE_VALUE, 

235 # timeout=TIMEOUT_VALUE, 

236 # ) 

237 try: 

238 # task_check.wait() 

239 es_waiter(client, Task, action='forcemerge', task_id=response['task'], **WAITKW) 

240 except BadClientResult as exc: 

241 logger.error('Exception: %s', exc) 

242 raise FatalError('Failed to forcemerge', exc) 

243 logger.info('Forcemerge completed.') 

244 

245 

246def generic_get(func: t.Callable, **kwargs) -> t.Dict: 

247 """Generic, reusable client request getter""" 

248 try: 

249 response = dict(func(**kwargs)) 

250 logger.debug(response) 

251 except NotFoundError as nferr: 

252 raise MissingError('Generic Get MissingError', nferr, nferr.info) 

253 except (ApiError, TransportError, BadRequestError) as err: 

254 raise BadClientResult('Generic Get BadClientResult Failure', err) 

255 return response 

256 

257 

258def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int: 

259 """Return the number of hits matching the query 

260 

261 :param client: A client connection object 

262 :param index: The index or pattern to search 

263 :param query: The query to execute 

264 

265 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

266 :type index: str 

267 :type query: dict 

268 

269 :returns: The number of hits matching the query 

270 """ 

271 result = do_search(client, index, query) 

272 return result['hits']['total']['value'] 

273 

274 

275def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict: 

276 """Get the ILM lifecycle settings for an index 

277 

278 :param client: A client connection object 

279 :param index: The index to check 

280 

281 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

282 :type index: str 

283 

284 :returns: The ILM settings object for the named index 

285 """ 

286 try: 

287 response = dict(client.ilm.explain_lifecycle(index=index)) 

288 logger.debug(response) 

289 except NotFoundError as err: 

290 logger.error("Index: '%s' not found. Error: %s", index, err) 

291 raise MissingIndex(f'Index "{index}" not found', err, index) 

292 return response 

293 

294 

295def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict: 

296 """Get the ILM lifecycle settings for an policyname 

297 

298 :param client: A client connection object 

299 :param policyname: The ILM policy name to check 

300 

301 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

302 :type policyname: str 

303 

304 :returns: The ILM settings object for the named policy, or None 

305 """ 

306 retval = {} 

307 try: 

308 retval = dict(client.ilm.get_lifecycle(name=policyname)) 

309 except NotFoundError: 

310 logger.debug("ILM policy '%s' not found.", policyname) 

311 return retval 

312 

313 

314def get_index(client: 'Elasticsearch', index: str) -> t.Dict: 

315 """Get the info about an index 

316 

317 :param client: A client connection object 

318 :param index: The index, csv indices, or index pattern to get 

319 

320 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

321 :type index: str 

322 

323 :returns: The index information object for the named index 

324 """ 

325 try: 

326 response = dict( 

327 client.indices.get(index=index, expand_wildcards=['open', 'hidden']) 

328 ) 

329 logger.debug('Found indices: %s', list(response.keys())) 

330 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

331 logger.error("Index: '%s' not found. Error: %s", index, err) 

332 raise MissingIndex(f'Index "{index}" not found', err, index) 

333 return response 

334 

335 

336def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]: 

337 """Get the index's ILM phase 

338 

339 :param client: A client connection object 

340 :param index: The index name 

341 

342 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

343 :type index: str 

344 

345 :returns: The ILM phase of ``index`` 

346 """ 

347 phase = None 

348 ilm = get_ilm(client, index) 

349 try: 

350 phase = ilm['indices'][index]['phase'] 

351 except KeyError: # Perhaps in cold/frozen but not ILM affiliated 

352 settings = get_settings(client, index)[index]['settings']['index'] 

353 if "store" in settings: 

354 # Checking if it's a mounted searchable snapshot 

355 if settings["store"]["type"] == "snapshot": 

356 phase = get_phase_from_tier_pref(settings) 

357 else: 

358 phase = None 

359 return phase 

360 

361 

362def get_phase_from_tier_pref( 

363 idx_settings: t.Dict, 

364) -> t.Union[t.Literal['frozen', 'cold'], None]: 

365 """ 

366 Check the index's ``_tier_preference`` as an indicator which phase the index is in 

367 

368 :param idx_settings: The results from a 

369 get_settings(index=idx)[idx]['settings']['index'] call 

370 

371 :returns: The ILM phase based on the index settings, or None 

372 """ 

373 try: 

374 tiers = idx_settings['routing']['allocation']['include']['_tier_preference'] 

375 except KeyError: 

376 tiers = '' 

377 if tiers == 'data_frozen': 

378 return 'frozen' 

379 if 'data_cold' in tiers.split(','): 

380 return 'cold' 

381 return None 

382 

383 

384def ilm_move( 

385 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict 

386) -> None: 

387 """Move index 'name' from the current step to the next step""" 

388 try: 

389 client.ilm.move_to_step( 

390 index=name, current_step=current_step, next_step=next_step 

391 ) 

392 except Exception as err: 

393 msg = ( 

394 f'Unable to move index {name} to ILM next step: {next_step}. ' 

395 f'Error: {err}' 

396 ) 

397 logger.critical(msg) 

398 raise BadClientResult(msg, err) 

399 

400 

401def modify_data_stream( 

402 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]] 

403) -> None: 

404 """Modify a data_stream using the contents of actions 

405 

406 :param client: A client connection object 

407 :param actions: The actions to take 

408 

409 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

410 :type actions: dict 

411 """ 

412 try: 

413 client.indices.modify_data_stream(actions=actions) 

414 except BadRequestError as exc: 

415 logger.error( 

416 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc 

417 ) 

418 raise MissingIndex( 

419 'Missing either data_stream or index', exc, f'actions: {actions}' 

420 ) 

421 

422 

423def report_segment_count(client: 'Elasticsearch', index: str) -> str: 

424 """ 

425 Report the count of segments from index 

426 

427 :param client: A client connection object 

428 :param index: The index to check 

429 

430 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

431 :type index: str 

432 

433 :returns: Formatted message describing shard count and segment count for index 

434 """ 

435 shardcount = 0 

436 segmentcount = 0 

437 try: 

438 output = client.cat.shards( 

439 index=index, format='json', h=['index', 'shard', 'prirep', 'sc'] 

440 ) 

441 except Exception as exc: 

442 logger.error('Exception: %s', exc) 

443 raise BadClientResult('Unable to get cat shards output', exc) 

444 for shard in output: 

445 if shard['prirep'] == 'r': # type: ignore 

446 # Skip replica shards 

447 continue 

448 if index != shard['index']: # type: ignore 

449 logger.warning( 

450 'Index name %s does not match what was returned by the _cat API: %s', 

451 index, 

452 shard['index'], # type: ignore 

453 ) 

454 shardcount += 1 

455 segmentcount += int(shard['sc']) # type: ignore 

456 logger.debug( 

457 'Index %s, shard %s has %s segments', 

458 index, 

459 shard["shard"], # type: ignore 

460 shard["sc"], # type: ignore 

461 ) 

462 

463 return ( 

464 f'index {index} has {shardcount} shards and a total of {segmentcount} ' 

465 f'segments, averaging {float(segmentcount/shardcount)} segments per shard' 

466 ) 

467 

468 

469def get_settings(client: 'Elasticsearch', index: str) -> t.Dict: 

470 """Get the settings for an index 

471 

472 :param client: A client connection object 

473 :param index: The index to check 

474 

475 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

476 :type index: str 

477 

478 :returns: The settings object for the named index 

479 """ 

480 logger.debug('Getting settings for index: %s', index) 

481 try: 

482 response = dict( 

483 client.indices.get_settings( 

484 index=index, expand_wildcards=['open', 'hidden'] 

485 ) 

486 ) 

487 logger.debug(response) 

488 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

489 logger.error("Index: '%s' not found. Error: %s", index, err) 

490 raise MissingIndex(f'Index "{index}" not found', err, index) 

491 logger.debug('Index settings collected.') 

492 return response 

493 

494 

495def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None: 

496 """Modify a data_stream using the contents of actions 

497 

498 :param client: A client connection object 

499 :param settings: The index settings to apply 

500 

501 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

502 :type settings: dict 

503 """ 

504 try: 

505 client.indices.put_settings(index=index, settings=settings) 

506 except NotFoundError as exc: 

507 logger.error("Index '%s' not found: %s", index, exc) 

508 raise MissingIndex('Index not found', exc, index) 

509 except BadRequestError as exc: 

510 logger.error("Bad settings: %s. ERROR: %s", settings, exc) 

511 raise BadClientResult(f'Invalid settings: {settings}', exc) 

512 

513 

514def get_progress_doc( 

515 client: 'Elasticsearch', 

516 index_name: str, 

517 job_id: str, 

518 task_id: str, 

519 stepname: str = '', 

520) -> t.Dict: 

521 """Get a task tracking doc 

522 

523 :param client: A client connection object 

524 :param index_name: The index name 

525 :param job_id: The job name string for the present redaction run 

526 :param task_id: The task_id string of the task we are searching for 

527 :param stepname: [Optional] The step name string of the step we are searching for 

528 

529 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

530 :type index_name: str 

531 :type job_id: str 

532 :type task_id: str 

533 :type stepname: str 

534 

535 :returns: The progress tracking document from the progress/status tracking index 

536 for the task or step 

537 """ 

538 # Base value for stub (task) 

539 stub = f'Task: {task_id} of Job: {job_id}' 

540 # The proto query 

541 query = { 

542 "bool": { 

543 "must": {"parent_id": {"type": "task", "id": job_id}}, 

544 "filter": [], 

545 } 

546 } 

547 # The base value of the bool filter (task) 

548 filters = [ 

549 {"term": {"task": task_id}}, 

550 {"term": {"job": job_id}}, 

551 ] 

552 if not stepname: 

553 logger.info('Tracking progress for %s', stub) 

554 # For Tasks progress docs, we must not match docs with a step field 

555 query['bool']['must_not'] = {"exists": {"field": "step"}} 

556 else: 

557 # Update stub to be for a step 

558 stub = f'Step: {stepname} of Task: {task_id} of Job: {job_id}' 

559 logger.info('Tracking progress for %s', stub) 

560 # Update filters to include step 

561 filters.append({"term": {"step": stepname}}) 

562 # Add the filters to the query 

563 query['bool']['filter'] = filters # type: ignore 

564 try: 

565 result = do_search(client, index_pattern=index_name, query=query) 

566 except NotFoundError as err: 

567 msg = f'Tracking index {index_name} is missing' 

568 logger.critical(msg) 

569 raise MissingIndex(msg, err, index_name) 

570 # First get the edge case of multiple hits out of the way 

571 if result['hits']['total']['value'] > 1: 

572 msg = f'Tracking document for {stub} is not unique. This should never happen.' 

573 logger.critical(msg) 

574 raise FatalError(msg, ValueError()) 

575 # After the > 1 test, if we don't have exactly 1 hit, we have zero hits 

576 if result['hits']['total']['value'] != 1: 

577 msg = f'Tracking document for {stub} does not exist' 

578 missing = f'A document with step: {stepname}, task: {task_id}, job: {job_id}' 

579 logger.debug(msg) 

580 raise MissingDocument(msg, Exception(), missing) 

581 # There can be only one... 

582 return result['hits']['hits'][0] 

583 

584 

585def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict: 

586 """Get the progress/status tracking doc for the provided job_id 

587 

588 :param client: A client connection object 

589 :param index_name: The index name 

590 :param job_id: The job_id string for the present redaction run 

591 

592 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

593 :type index_name: str 

594 :type job_id: str 

595 

596 :returns: The tracking document from the progress/status tracking index 

597 """ 

598 if not index_exists(client, index_name): 

599 msg = f'Tracking index {index_name} is missing' 

600 logger.critical(msg) 

601 raise MissingIndex(msg, Exception(), index_name) 

602 try: 

603 doc = dict(client.get(index=index_name, id=job_id)) 

604 # logger.debug('TRACKING DOC = %s', doc) 

605 except NotFoundError as exc: 

606 msg = f'Tracking document for job_id {job_id} does not exist' 

607 logger.debug(msg) 

608 raise MissingDocument(msg, exc, job_id) 

609 return doc['_source'] 

610 

611 

612def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse': 

613 """Test whether index ``index_name`` exists 

614 

615 :param client: A client connection object 

616 :param index_name: The index name 

617 

618 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

619 :type index_name: str 

620 

621 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise 

622 ``HeadApiResponse(False)`` 

623 """ 

624 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden']) 

625 

626 

627def job_exists( 

628 client: 'Elasticsearch', index_name: str, job_id: str 

629) -> 'HeadApiResponse': 

630 """Test whether a document exists for the present job_id 

631 

632 :param client: A client connection object 

633 :param index_name: The index name 

634 :param job_id: The job_id string for the present redaction run 

635 

636 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

637 :type index_name: str 

638 :type job_id: str 

639 

640 :returns: ``HeadApiResponse(True)`` if a document exists with the present 

641 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)`` 

642 """ 

643 return client.exists(index=index_name, id=job_id) 

644 

645 

646def mount_index(var: 'DotMap') -> None: 

647 """Mount index as a searchable snapshot 

648 

649 :param var: A collection of variables from 

650 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var` 

651 

652 :type var: DotMap 

653 """ 

654 response = {} 

655 msg = ( 

656 f'Mounting {var.redaction_target} renamed as {var.mount_name} ' 

657 f'from repository: {var.repository}, snapshot: {var.new_snap_name} ' 

658 f'with storage={var.storage}' 

659 ) 

660 logger.debug(msg) 

661 while index_exists(var.client, var.mount_name): 

662 logger.warning('Index %s exists. Deleting before remounting', var.mount_name) 

663 delete_index(var.client, var.mount_name) 

664 time.sleep(3.0) 

665 try: 

666 response = dict( 

667 var.client.searchable_snapshots.mount( 

668 repository=var.repository, 

669 snapshot=var.new_snap_name, 

670 index=var.redaction_target, 

671 renamed_index=var.mount_name, 

672 storage=var.storage, 

673 ) 

674 ) 

675 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

676 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err) 

677 logger.debug(response) 

678 raise BadClientResult('Error when mount index attempted', err) 

679 logger.info('Ensuring searchable snapshot mount is in "green" health state...') 

680 try: 

681 es_waiter( 

682 var.client, 

683 Index, 

684 action='mount', 

685 index=var.mount_name, 

686 pause=PAUSE_VALUE, 

687 timeout=30.0, 

688 ) 

689 except BadClientResult as exc: 

690 logger.error('Exception: %s', exc) 

691 raise FatalError('Failed to mount index from snapshot', exc) 

692 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name) 

693 

694 

695def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict: 

696 """Resolve an index 

697 

698 :param client: A client connection object 

699 :param index: The index name 

700 

701 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

702 :type index: str 

703 

704 :returns: The return value from 

705 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index` 

706 :rtype: dict 

707 """ 

708 logger.debug('Resolving index: %s', index) 

709 try: 

710 response = dict( 

711 client.indices.resolve_index( 

712 name=index, expand_wildcards=['open', 'hidden'] 

713 ) 

714 ) 

715 logger.debug(response) 

716 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

717 logger.error("Index: '%s' not found. Error: %s", index, err) 

718 raise MissingIndex(f'Index "{index}" not found', err, index) 

719 logger.debug('Index resolved.') 

720 return response 

721 

722 

723def restore_index( 

724 client: 'Elasticsearch', 

725 repo_name: str, 

726 snap_name: str, 

727 index_name: str, 

728 replacement: str, 

729 re_pattern: str = '(.+)', 

730 index_settings: t.Union[str, None] = None, 

731) -> None: 

732 """Restore an index 

733 

734 :param client: A client connection object 

735 :param repo_name: The repository name 

736 :param snap_name: The snapshot name 

737 :param index_name: The index name as it appears in the snapshot metadata 

738 :param replacement: The name or substitution string to use as the restored index 

739 name 

740 :param re_pattern: The optional rename pattern for use with ``replacement`` 

741 :param index_settings: Any settings to apply to the restored index, such as 

742 _tier_preference 

743 

744 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

745 :type repo_name: str 

746 :type snap_name: str 

747 :type index_name: str 

748 :type replacement: str 

749 :type re_pattern: str 

750 :type index_settings: dict 

751 """ 

752 msg = ( 

753 f"repository={repo_name}, snapshot={snap_name}, indices={index_name}," 

754 f"include_aliases=False," 

755 f"ignore_index_settings=[" 

756 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias'," 

757 f" 'index.routing.allocation.include._tier_preference']," 

758 f"index_settings={index_settings}," 

759 f"rename_pattern={re_pattern}," 

760 f"rename_replacement={replacement}," 

761 f"wait_for_completion=False" 

762 ) 

763 logger.debug('RESTORE settings: %s', msg) 

764 try: 

765 response = client.snapshot.restore( 

766 repository=repo_name, 

767 snapshot=snap_name, 

768 indices=index_name, 

769 include_aliases=False, 

770 ignore_index_settings=[ 

771 'index.lifecycle.name', 

772 'index.lifecycle.rollover_alias', 

773 'index.routing.allocation.include._tier_preference', 

774 ], 

775 index_settings=index_settings, # type: ignore 

776 rename_pattern=re_pattern, 

777 rename_replacement=replacement, 

778 wait_for_completion=False, 

779 ) 

780 logger.debug('Response = %s', response) 

781 logger.info('Checking if restoration completed...') 

782 try: 

783 es_waiter(client, Restore, index_list=[replacement], **WAITKW) 

784 except BadClientResult as bad: 

785 logger.error('Exception: %s', bad) 

786 raise BadClientResult('Failed to restore index from snapshot', bad) 

787 msg = f'Restoration of index {index_name} as {replacement} complete' 

788 logger.info(msg) 

789 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

790 msg = ( 

791 f'Restoration of index {index_name} as {replacement} yielded an error: ' 

792 f'{err}' 

793 ) 

794 logger.error(msg) 

795 raise BadClientResult(msg, err) 

796 # verify index is green 

797 logger.info('Ensuring restored index is in "green" health state...') 

798 res = dict(client.cluster.health(index=replacement, filter_path='status')) 

799 logger.debug('res = %s', res) 

800 if res['status'] == 'red': 

801 msg = f'Restored index {replacement} is not in a healthy state' 

802 logger.error(msg) 

803 raise ValueMismatch(msg, 'index health is "red"', 'green or yellow') 

804 

805 

806def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None: 

807 """Redact data from an index using a painless script. 

808 

809 Collect the task_id and wait for the reinding job to complete before returning 

810 

811 :param client: A client connection object 

812 :param index_name: The index to act on 

813 :param config: The config block being iterated. Contains ``query``, ``message``, 

814 and ``fields`` 

815 

816 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

817 :type index_name: str 

818 :type config: dict 

819 """ 

820 logger.debug('Begin redaction...') 

821 logger.info('Before update by query, %s', report_segment_count(client, index_name)) 

822 logger.debug('Updating and redacting data...') 

823 script = build_script(config['message'], config['fields']) 

824 response = {} 

825 try: 

826 response = dict( 

827 client.update_by_query( 

828 index=index_name, 

829 script=script, 

830 query=config['query'], 

831 wait_for_completion=False, 

832 expand_wildcards=['open', 'hidden'], 

833 ) 

834 ) 

835 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

836 logger.critical('update_by_query yielded an error: %s', err) 

837 raise FatalError('update_by_query API call failed', err) 

838 logger.debug('Checking update by query status...') 

839 logger.debug('response = %s', response) 

840 # task_check = Task(client, action='update_by_query', task_id=response['task']) 

841 try: 

842 # task_check.wait() 

843 es_waiter( 

844 client, Task, action='update_by_query', task_id=response['task'], **WAITKW 

845 ) 

846 except BadClientResult as exc: 

847 logger.error('Exception: %s', exc) 

848 raise FatalError('Failed to complete update by query', exc) 

849 logger.info('After update by query, %s', report_segment_count(client, index_name)) 

850 logger.debug('Update by query completed.') 

851 

852 

853def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict: 

854 """Remove any ILM policy associated with index 

855 

856 :param client: A client connection object 

857 :param index: The index 

858 

859 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

860 :type index: str 

861 

862 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}`` 

863 """ 

864 try: 

865 response = dict(client.ilm.remove_policy(index=index)) 

866 logger.debug(response) 

867 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

868 logger.error("Index: '%s' not found. Error: %s", index, err) 

869 raise MissingIndex(f'Index "{index}" not found', err, index) 

870 return response 

871 

872 

873def take_snapshot( 

874 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str 

875) -> None: 

876 """ 

877 Take snapshot of index 

878 

879 :param client: A client connection object 

880 :param repo_name: The repository name 

881 :param snap_name: The snapshot name 

882 :param index_name: The name of the index to snapshot 

883 

884 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

885 :type repo_name: str 

886 :type snap_name: str 

887 :type index_name: str 

888 """ 

889 logger.info('Creating new snapshot...') 

890 response = {} 

891 try: 

892 response = dict( 

893 client.snapshot.create( 

894 repository=repo_name, 

895 snapshot=snap_name, 

896 indices=index_name, 

897 wait_for_completion=False, 

898 ) 

899 ) 

900 logger.debug('Snapshot response: %s', response) 

901 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err: 

902 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}' 

903 logger.critical(msg) 

904 raise BadClientResult(msg, err) 

905 logger.info('Checking on status of snapshot...') 

906 # snapshot_check = Snapshot( 

907 # client, snapshot=snap_name, repository=repo_name, **WAITKW 

908 # ) 

909 

910 try: 

911 # snapshot_check.wait() 

912 es_waiter(client, Snapshot, snapshot=snap_name, repository=repo_name, **WAITKW) 

913 except BadClientResult as exc: 

914 logger.error('Exception: %s', exc) 

915 raise FatalError('Failed to complete index snapshot', exc) 

916 msg = ( 

917 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} ' 

918 f'succeeded.' 

919 ) 

920 logger.info(msg) 

921 

922 

923def update_doc( 

924 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0 

925) -> None: 

926 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc`` 

927 

928 :param client: A client connection object 

929 :param index: The index to write to 

930 :param doc_id: The document doc_id to update 

931 :param doc: The contents of the document 

932 :param routing: Because our tracking doc is using parent/child relationships, we 

933 need to route. We use an integer, but the API calls expect a string, so we 

934 manually cast this value in the API call as one. 

935 

936 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

937 :type index: str 

938 :type doc_id: str 

939 :type doc: dict 

940 :type routing: int 

941 """ 

942 try: 

943 if doc_id: 

944 _ = client.update( 

945 index=index, 

946 id=doc_id, 

947 doc=doc, 

948 doc_as_upsert=True, 

949 routing=str(routing), 

950 refresh=True, 

951 ) 

952 else: 

953 logger.debug('No value for document id. Creating new document.') 

954 _ = client.index( 

955 index=index, document=doc, routing=str(routing), refresh=True 

956 ) 

957 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

958 msg = f'Error updating document: {err.args[0]}' 

959 logger.error(msg) 

960 raise BadClientResult(msg, err) 

961 

962 

963def verify_index(client: 'Elasticsearch', index: str) -> bool: 

964 """Verify the index exists and is an index, not an alias 

965 

966 :param client: A client connection object 

967 :param index: The index to check 

968 

969 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

970 :type index: str 

971 """ 

972 logger.debug('Verifying index: %s', index) 

973 retval = True 

974 response = {} 

975 try: 

976 response = dict( 

977 client.indices.get_settings( 

978 index=index, expand_wildcards=['open', 'hidden'] 

979 ) 

980 ) 

981 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

982 logger.error("Index: '%s' not found. Error: %s", index, err) 

983 retval = False 

984 logger.debug(response) 

985 if len(list(response.keys())) > 1: 

986 # We have more than one key, that means we hit an alias 

987 logger.error('Index %s is one member of an alias.', index) 

988 retval = False 

989 elif list(response.keys())[0] != index: 

990 # There's a 1 to 1 alias, but it is not the index name 

991 logger.error('Index %s is an alias.', index) 

992 retval = False 

993 return retval