Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 66%

349 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-10-01 16:39 -0600

1"""Functions making Elasticsearch API calls""" 

2 

3from os import getenv 

4import typing as t 

5import logging 

6from elasticsearch8.exceptions import ( 

7 ApiError, 

8 NotFoundError, 

9 TransportError, 

10 BadRequestError, 

11) 

12from es_wait import Index, Restore, Snapshot, Task 

13from es_pii_tool.defaults import ( 

14 PAUSE_DEFAULT, 

15 PAUSE_ENVVAR, 

16 TIMEOUT_DEFAULT, 

17 TIMEOUT_ENVVAR, 

18) 

19from es_pii_tool.exceptions import ( 

20 BadClientResult, 

21 FatalError, 

22 MissingDocument, 

23 MissingError, 

24 MissingIndex, 

25 ValueMismatch, 

26) 

27from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter 

28 

29if t.TYPE_CHECKING: 

30 from dotmap import DotMap # type: ignore 

31 from elasticsearch8 import Elasticsearch 

32 from elastic_transport import HeadApiResponse 

33 

34PAUSE_VALUE = float(getenv(PAUSE_ENVVAR, default=PAUSE_DEFAULT)) 

35TIMEOUT_VALUE = float(getenv(TIMEOUT_ENVVAR, default=TIMEOUT_DEFAULT)) 

36WAITKW = {'pause': PAUSE_VALUE, 'timeout': TIMEOUT_VALUE} 

37 

38logger = logging.getLogger(__name__) 

39 

40# pylint: disable=R0913,W0707 

41 

42 

43def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None: 

44 """Assign index to alias(es)""" 

45 try: 

46 response = client.indices.put_alias(index=index_name, name=alias_name) 

47 logger.info( 

48 "Index '%s' was successfully added to alias '%s'", index_name, alias_name 

49 ) 

50 logger.debug(response) 

51 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

52 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed' 

53 logger.critical(msg) 

54 raise BadClientResult(msg, err) 

55 

56 

57def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None: 

58 """Check the index""" 

59 logger.info('Making a quick check on redacted index docs...') 

60 result = do_search(client, index_name, job_config['query']) 

61 if result['hits']['total']['value'] == 0: 

62 logger.warning( 

63 'Query returned no results, assuming it only returns docs ' 

64 'to be redacted and not already redacted...' 

65 ) 

66 return 

67 success = check_fields(result, job_config) 

68 if not success: 

69 msg = 'One or more fields were not redacted. Check the logs' 

70 logger.error(msg) 

71 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0') 

72 

73 

74def clear_cache(client: 'Elasticsearch', index_name: str) -> None: 

75 """Clear the cache for named index 

76 

77 :param client: A client connection object 

78 :param index_name: The index name 

79 

80 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

81 :type index_name: str 

82 

83 :returns: No return value 

84 :rtype: None 

85 """ 

86 response = {} 

87 logger.info('Clearing cache data for %s...', index_name) 

88 try: 

89 response = dict( 

90 client.indices.clear_cache( 

91 index=index_name, expand_wildcards=['open', 'hidden'] 

92 ) 

93 ) 

94 logger.debug(response) 

95 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

96 logger.error('clear_cache API call resulted in an error: %s', err) 

97 

98 

99def close_index(client: 'Elasticsearch', name: str) -> None: 

100 """Close an index 

101 

102 :param name: The index name to close 

103 

104 :type name: str 

105 """ 

106 try: 

107 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden']) 

108 logger.debug(response) 

109 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

110 logger.error("Index: '%s' not found. Error: %s", name, err) 

111 raise MissingIndex(f'Index "{name}" not found', err, name) 

112 

113 

114def create_index( 

115 client: 'Elasticsearch', 

116 name: str, 

117 mappings: t.Union[t.Dict, None] = None, 

118 settings: t.Union[t.Dict, None] = None, 

119) -> None: 

120 """Create an Elasticsearch index with associated mappings and settings 

121 

122 :param name: The index name 

123 :param mappings: The index mappings 

124 :param settings: The index settings 

125 

126 :type name: str 

127 :type mappings: dict 

128 :type settings: dict 

129 """ 

130 if index_exists(client, name): 

131 logger.info('Index %s already exists', name) 

132 return 

133 try: 

134 response = client.indices.create( 

135 index=name, settings=settings, mappings=mappings 

136 ) 

137 logger.debug(response) 

138 except BadRequestError as err: 

139 logger.error("Index: '%s' already exists. Error: %s", name, err) 

140 raise BadClientResult(f'Index "{name}" already exists', err) 

141 except (ApiError, TransportError) as err: 

142 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err) 

143 raise BadClientResult(f'Unknown error trying to create index: {name}', err) 

144 

145 

146def delete_index(client: 'Elasticsearch', name: str) -> None: 

147 """Delete an index 

148 

149 :param client: A client connection object 

150 :param name: The index name to delete 

151 

152 :type name: str 

153 """ 

154 try: 

155 response = client.indices.delete( 

156 index=name, expand_wildcards=['open', 'hidden'] 

157 ) 

158 logger.debug(response) 

159 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

160 # logger.error("Index: '%s' not found. Error: %s", name, err) 

161 raise MissingIndex(f'Index "{name}" not found', err, name) 

162 

163 

164def do_search( 

165 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10 

166) -> t.Dict: 

167 """Return search result of ``query`` against ``index_pattern`` 

168 

169 :param client: A client connection object 

170 :param index_pattern: A single index name, a csv list of indices, or other pattern 

171 :param query: An Elasticsearch DSL search query 

172 :param size: Maximum number of results to return 

173 

174 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

175 :type index_pattern: str 

176 :type query: dict 

177 :type size: int 

178 """ 

179 try: 

180 response = dict( 

181 client.search( 

182 index=index_pattern, 

183 query=query, 

184 size=size, 

185 expand_wildcards=['open', 'hidden'], 

186 ) 

187 ) 

188 logger.debug(response) 

189 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

190 msg = f'Attempt to collect search results yielded an exception: {err}' 

191 logger.critical(msg) 

192 raise BadClientResult(msg, err) 

193 return response 

194 

195 

196def forcemerge_index( 

197 client: 'Elasticsearch', 

198 index: t.Union[str, None] = None, 

199 max_num_segments: int = 1, 

200 only_expunge_deletes: bool = False, 

201) -> None: 

202 """ 

203 Force Merge an index 

204 

205 :param client: A client connection object 

206 :param index: A single index name 

207 :param max_num_segments: The maximum number of segments per shard after a 

208 force merge 

209 :param only_expunge_deletes: Only expunge deleted docs during force merging. 

210 If True, ignores max_num_segments. 

211 

212 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

213 :type index: str 

214 :type max_num_segments: int 

215 :type only_expunge_deletes: bool 

216 """ 

217 kwargs = {'index': index, 'wait_for_completion': False} 

218 if only_expunge_deletes: 

219 kwargs.update({'only_expunge_deletes': only_expunge_deletes}) 

220 else: 

221 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore 

222 try: 

223 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore 

224 logger.debug(response) 

225 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

226 logger.error("Index: '%s' not found. Error: %s", index, err) 

227 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore 

228 logger.info('Waiting for forcemerge to complete...') 

229 # task_check = Task( 

230 # client, 

231 # action='forcemerge', 

232 # task_id=response['task'], 

233 # pause=PAUSE_VALUE, 

234 # timeout=TIMEOUT_VALUE, 

235 # ) 

236 try: 

237 # task_check.wait() 

238 es_waiter(client, Task, action='forcemerge', task_id=response['task'], **WAITKW) 

239 except BadClientResult as exc: 

240 logger.error('Exception: %s', exc) 

241 raise FatalError('Failed to forcemerge', exc) 

242 logger.info('Forcemerge completed.') 

243 

244 

245def generic_get(func: t.Callable, **kwargs) -> t.Dict: 

246 """Generic, reusable client request getter""" 

247 try: 

248 response = dict(func(**kwargs)) 

249 logger.debug(response) 

250 except NotFoundError as nferr: 

251 raise MissingError('Generic Get MissingError', nferr, nferr.info) 

252 except (ApiError, TransportError, BadRequestError) as err: 

253 raise BadClientResult('Generic Get BadClientResult Failure', err) 

254 return response 

255 

256 

257def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int: 

258 """Return the number of hits matching the query 

259 

260 :param client: A client connection object 

261 :param index: The index or pattern to search 

262 :param query: The query to execute 

263 

264 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

265 :type index: str 

266 :type query: dict 

267 

268 :returns: The number of hits matching the query 

269 """ 

270 result = do_search(client, index, query) 

271 return result['hits']['total']['value'] 

272 

273 

274def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict: 

275 """Get the ILM lifecycle settings for an index 

276 

277 :param client: A client connection object 

278 :param index: The index to check 

279 

280 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

281 :type index: str 

282 

283 :returns: The ILM settings object for the named index 

284 """ 

285 try: 

286 response = dict(client.ilm.explain_lifecycle(index=index)) 

287 logger.debug(response) 

288 except NotFoundError as err: 

289 logger.error("Index: '%s' not found. Error: %s", index, err) 

290 raise MissingIndex(f'Index "{index}" not found', err, index) 

291 return response 

292 

293 

294def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict: 

295 """Get the ILM lifecycle settings for an policyname 

296 

297 :param client: A client connection object 

298 :param policyname: The ILM policy name to check 

299 

300 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

301 :type policyname: str 

302 

303 :returns: The ILM settings object for the named policy, or None 

304 """ 

305 retval = {} 

306 try: 

307 retval = dict(client.ilm.get_lifecycle(name=policyname)) 

308 except NotFoundError: 

309 logger.debug("ILM policy '%s' not found.", policyname) 

310 return retval 

311 

312 

313def get_index(client: 'Elasticsearch', index: str) -> t.Dict: 

314 """Get the info about an index 

315 

316 :param client: A client connection object 

317 :param index: The index, csv indices, or index pattern to get 

318 

319 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

320 :type index: str 

321 

322 :returns: The index information object for the named index 

323 """ 

324 try: 

325 response = dict( 

326 client.indices.get(index=index, expand_wildcards=['open', 'hidden']) 

327 ) 

328 logger.debug('Found indices: %s', list(response.keys())) 

329 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

330 logger.error("Index: '%s' not found. Error: %s", index, err) 

331 raise MissingIndex(f'Index "{index}" not found', err, index) 

332 return response 

333 

334 

335def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]: 

336 """Get the index's ILM phase 

337 

338 :param client: A client connection object 

339 :param index: The index name 

340 

341 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

342 :type index: str 

343 

344 :returns: The ILM phase of ``index`` 

345 """ 

346 phase = None 

347 ilm = get_ilm(client, index) 

348 try: 

349 phase = ilm['indices'][index]['phase'] 

350 except KeyError: # Perhaps in cold/frozen but not ILM affiliated 

351 settings = get_settings(client, index)[index]['settings']['index'] 

352 if "store" in settings: 

353 # Checking if it's a mounted searchable snapshot 

354 if settings["store"]["type"] == "snapshot": 

355 phase = get_phase_from_tier_pref(settings) 

356 else: 

357 phase = None 

358 return phase 

359 

360 

361def get_phase_from_tier_pref( 

362 idx_settings: t.Dict, 

363) -> t.Union[t.Literal['frozen', 'cold'], None]: 

364 """ 

365 Check the index's ``_tier_preference`` as an indicator which phase the index is in 

366 

367 :param idx_settings: The results from a 

368 get_settings(index=idx)[idx]['settings']['index'] call 

369 

370 :returns: The ILM phase based on the index settings, or None 

371 """ 

372 try: 

373 tiers = idx_settings['routing']['allocation']['include']['_tier_preference'] 

374 except KeyError: 

375 tiers = '' 

376 if tiers == 'data_frozen': 

377 return 'frozen' 

378 if 'data_cold' in tiers.split(','): 

379 return 'cold' 

380 return None 

381 

382 

383def ilm_move( 

384 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict 

385) -> None: 

386 """Move index 'name' from the current step to the next step""" 

387 try: 

388 client.ilm.move_to_step( 

389 index=name, current_step=current_step, next_step=next_step 

390 ) 

391 except Exception as err: 

392 msg = ( 

393 f'Unable to move index {name} to ILM next step: {next_step}. ' 

394 f'Error: {err}' 

395 ) 

396 logger.critical(msg) 

397 raise BadClientResult(msg, err) 

398 

399 

400def modify_data_stream( 

401 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]] 

402) -> None: 

403 """Modify a data_stream using the contents of actions 

404 

405 :param client: A client connection object 

406 :param actions: The actions to take 

407 

408 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

409 :type actions: dict 

410 """ 

411 try: 

412 client.indices.modify_data_stream(actions=actions) 

413 except BadRequestError as exc: 

414 logger.error( 

415 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc 

416 ) 

417 raise MissingIndex( 

418 'Missing either data_stream or index', exc, f'actions: {actions}' 

419 ) 

420 

421 

422def report_segment_count(client: 'Elasticsearch', index: str) -> str: 

423 """ 

424 Report the count of segments from index 

425 

426 :param client: A client connection object 

427 :param index: The index to check 

428 

429 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

430 :type index: str 

431 

432 :returns: Formatted message describing shard count and segment count for index 

433 """ 

434 shardcount = 0 

435 segmentcount = 0 

436 try: 

437 output = client.cat.shards( 

438 index=index, format='json', h=['index', 'shard', 'prirep', 'sc'] 

439 ) 

440 except Exception as exc: 

441 logger.error('Exception: %s', exc) 

442 raise BadClientResult('Unable to get cat shards output', exc) 

443 for shard in output: 

444 if shard['prirep'] == 'r': # type: ignore 

445 # Skip replica shards 

446 continue 

447 if index != shard['index']: # type: ignore 

448 logger.warning( 

449 'Index name %s does not match what was returned by the _cat API: %s', 

450 index, 

451 shard['index'], # type: ignore 

452 ) 

453 shardcount += 1 

454 segmentcount += int(shard['sc']) # type: ignore 

455 logger.debug( 

456 'Index %s, shard %s has %s segments', 

457 index, 

458 shard["shard"], # type: ignore 

459 shard["sc"], # type: ignore 

460 ) 

461 

462 return ( 

463 f'index {index} has {shardcount} shards and a total of {segmentcount} ' 

464 f'segments, averaging {float(segmentcount/shardcount)} segments per shard' 

465 ) 

466 

467 

468def get_settings(client: 'Elasticsearch', index: str) -> t.Dict: 

469 """Get the settings for an index 

470 

471 :param client: A client connection object 

472 :param index: The index to check 

473 

474 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

475 :type index: str 

476 

477 :returns: The settings object for the named index 

478 """ 

479 logger.debug('Getting settings for index: %s', index) 

480 try: 

481 response = dict( 

482 client.indices.get_settings( 

483 index=index, expand_wildcards=['open', 'hidden'] 

484 ) 

485 ) 

486 logger.debug(response) 

487 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

488 logger.error("Index: '%s' not found. Error: %s", index, err) 

489 raise MissingIndex(f'Index "{index}" not found', err, index) 

490 logger.debug('Index settings collected.') 

491 return response 

492 

493 

494def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None: 

495 """Modify a data_stream using the contents of actions 

496 

497 :param client: A client connection object 

498 :param settings: The index settings to apply 

499 

500 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

501 :type settings: dict 

502 """ 

503 try: 

504 client.indices.put_settings(index=index, settings=settings) 

505 except NotFoundError as exc: 

506 logger.error("Index '%s' not found: %s", index, exc) 

507 raise MissingIndex('Index not found', exc, index) 

508 except BadRequestError as exc: 

509 logger.error("Bad settings: %s. ERROR: %s", settings, exc) 

510 raise BadClientResult(f'Invalid settings: {settings}', exc) 

511 

512 

513def get_task_doc( 

514 client: 'Elasticsearch', index_name: str, job_id: str, task_id: str 

515) -> t.Dict: 

516 """Get a task tracking doc 

517 

518 :param client: A client connection object 

519 :param index_name: The index name 

520 :param job_id: The job_id string for the present redaction run 

521 :param task_id: The task_id string of the task we are searching for 

522 

523 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

524 :type index_name: str 

525 :type job_id: str 

526 :type task_id: str 

527 

528 :returns: The task tracking document from the progress/status tracking index 

529 """ 

530 query = { 

531 "bool": { 

532 "must": {"parent_id": {"type": "task", "id": job_id}}, 

533 "filter": [{"term": {"task": task_id}}], 

534 } 

535 } 

536 try: 

537 result = do_search(client, index_pattern=index_name, query=query) 

538 except NotFoundError as err: 

539 msg = f'Tracking index {index_name} is missing' 

540 logger.critical(msg) 

541 raise MissingIndex(msg, err, index_name) 

542 if result['hits']['total']['value'] != 1: 

543 msg = 'Tracking document for job: {job_id}, task: {task_id} does not exist' 

544 raise MissingDocument(msg, Exception(), msg) 

545 return result['hits']['hits'][0] 

546 

547 

548def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict: 

549 """Get the progress/status tracking doc 

550 

551 :param client: A client connection object 

552 :param index_name: The index name 

553 :param job_id: The job_id string for the present redaction run 

554 

555 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

556 :type index_name: str 

557 :type job_id: str 

558 

559 :returns: The tracking document from the progress/status tracking index 

560 """ 

561 if not index_exists(client, index_name): 

562 msg = f'Tracking index {index_name} is missing' 

563 logger.critical(msg) 

564 raise MissingIndex(msg, Exception(), index_name) 

565 try: 

566 doc = dict(client.get(index=index_name, id=job_id)) 

567 # logger.debug('TRACKING DOC = %s', doc) 

568 except NotFoundError as exc: 

569 msg = f'Tracking document for job_id {job_id} does not exist' 

570 logger.debug(msg) 

571 raise MissingDocument(msg, exc, job_id) 

572 return doc['_source'] 

573 

574 

575def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse': 

576 """Test whether index ``index_name`` exists 

577 

578 :param client: A client connection object 

579 :param index_name: The index name 

580 

581 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

582 :type index_name: str 

583 

584 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise 

585 ``HeadApiResponse(False)`` 

586 """ 

587 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden']) 

588 

589 

590def job_exists( 

591 client: 'Elasticsearch', index_name: str, job_id: str 

592) -> 'HeadApiResponse': 

593 """Test whether a document exists for the present job_id 

594 

595 :param client: A client connection object 

596 :param index_name: The index name 

597 :param job_id: The job_id string for the present redaction run 

598 

599 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

600 :type index_name: str 

601 :type job_id: str 

602 

603 :returns: ``HeadApiResponse(True)`` if a document exists with the present 

604 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)`` 

605 """ 

606 return client.exists(index=index_name, id=job_id) 

607 

608 

609def mount_index(var: 'DotMap') -> None: 

610 """Mount index as a searchable snapshot 

611 

612 :param var: A collection of variables from 

613 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var` 

614 

615 :type var: DotMap 

616 """ 

617 response = {} 

618 msg = ( 

619 f'Mounting {var.redaction_target} renamed as {var.mount_name} ' 

620 f'from repository: {var.repository}, snapshot: {var.new_snap_name} ' 

621 f'with storage={var.storage}' 

622 ) 

623 logger.debug(msg) 

624 try: 

625 response = dict( 

626 var.client.searchable_snapshots.mount( 

627 repository=var.repository, 

628 snapshot=var.new_snap_name, 

629 index=var.redaction_target, 

630 renamed_index=var.mount_name, 

631 storage=var.storage, 

632 ) 

633 ) 

634 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

635 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err) 

636 logger.debug(response) 

637 raise BadClientResult('Error when mount index attempted', err) 

638 logger.info('Ensuring searchable snapshot mount is in "green" health state...') 

639 try: 

640 es_waiter( 

641 var.client, 

642 Index, 

643 action='mount', 

644 index=var.mount_name, 

645 pause=PAUSE_VALUE, 

646 timeout=30.0, 

647 ) 

648 except BadClientResult as exc: 

649 logger.error('Exception: %s', exc) 

650 raise FatalError('Failed to mount index from snapshot', exc) 

651 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name) 

652 

653 

654def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict: 

655 """Resolve an index 

656 

657 :param client: A client connection object 

658 :param index: The index name 

659 

660 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

661 :type index: str 

662 

663 :returns: The return value from 

664 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index` 

665 :rtype: dict 

666 """ 

667 logger.debug('Resolving index: %s', index) 

668 try: 

669 response = dict( 

670 client.indices.resolve_index( 

671 name=index, expand_wildcards=['open', 'hidden'] 

672 ) 

673 ) 

674 logger.debug(response) 

675 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

676 logger.error("Index: '%s' not found. Error: %s", index, err) 

677 raise MissingIndex(f'Index "{index}" not found', err, index) 

678 logger.debug('Index resolved.') 

679 return response 

680 

681 

682def restore_index( 

683 client: 'Elasticsearch', 

684 repo_name: str, 

685 snap_name: str, 

686 index_name: str, 

687 replacement: str, 

688 re_pattern: str = '(.+)', 

689 index_settings: t.Union[str, None] = None, 

690) -> None: 

691 """Restore an index 

692 

693 :param client: A client connection object 

694 :param repo_name: The repository name 

695 :param snap_name: The snapshot name 

696 :param index_name: The index name as it appears in the snapshot metadata 

697 :param replacement: The name or substitution string to use as the restored index 

698 name 

699 :param re_pattern: The optional rename pattern for use with ``replacement`` 

700 :param index_settings: Any settings to apply to the restored index, such as 

701 _tier_preference 

702 

703 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

704 :type repo_name: str 

705 :type snap_name: str 

706 :type index_name: str 

707 :type replacement: str 

708 :type re_pattern: str 

709 :type index_settings: dict 

710 """ 

711 msg = ( 

712 f"repository={repo_name}, snapshot={snap_name}, indices={index_name}," 

713 f"include_aliases=False," 

714 f"ignore_index_settings=[" 

715 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias'," 

716 f" 'index.routing.allocation.include._tier_preference']," 

717 f"index_settings={index_settings}," 

718 f"rename_pattern={re_pattern}," 

719 f"rename_replacement={replacement}," 

720 f"wait_for_completion=False" 

721 ) 

722 logger.debug('RESTORE settings: %s', msg) 

723 try: 

724 response = client.snapshot.restore( 

725 repository=repo_name, 

726 snapshot=snap_name, 

727 indices=index_name, 

728 include_aliases=False, 

729 ignore_index_settings=[ 

730 'index.lifecycle.name', 

731 'index.lifecycle.rollover_alias', 

732 'index.routing.allocation.include._tier_preference', 

733 ], 

734 index_settings=index_settings, # type: ignore 

735 rename_pattern=re_pattern, 

736 rename_replacement=replacement, 

737 wait_for_completion=False, 

738 ) 

739 logger.debug('Response = %s', response) 

740 logger.info('Checking if restoration completed...') 

741 # restore_check = Restore( 

742 # client, pause=PAUSE_VALUE, timeout=TIMEOUT_VALUE, index_list=[replacement] 

743 # ) 

744 try: 

745 es_waiter(client, Restore, index_list=[replacement], **WAITKW) 

746 except BadClientResult as exc: 

747 logger.error('Exception: %s', exc) 

748 raise FatalError('Failed to restore index from snapshot', exc) 

749 msg = f'Restoration of index {index_name} as {replacement} complete' 

750 logger.info(msg) 

751 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

752 msg = ( 

753 f'Restoration of index {index_name} as {replacement} yielded an error: ' 

754 f'{err}' 

755 ) 

756 logger.error(msg) 

757 raise BadClientResult(msg, err) 

758 

759 

760def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None: 

761 """Redact data from an index using a painless script. 

762 

763 Collect the task_id and wait for the reinding job to complete before returning 

764 

765 :param client: A client connection object 

766 :param index_name: The index to act on 

767 :param config: The config block being iterated. Contains ``query``, ``message``, 

768 and ``fields`` 

769 

770 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

771 :type index_name: str 

772 :type config: dict 

773 """ 

774 logger.debug('Begin redaction...') 

775 logger.info('Before update by query, %s', report_segment_count(client, index_name)) 

776 logger.debug('Updating and redacting data...') 

777 script = build_script(config['message'], config['fields']) 

778 response = {} 

779 try: 

780 response = dict( 

781 client.update_by_query( 

782 index=index_name, 

783 script=script, 

784 query=config['query'], 

785 wait_for_completion=False, 

786 expand_wildcards=['open', 'hidden'], 

787 ) 

788 ) 

789 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

790 logger.critical('update_by_query yielded an error: %s', err) 

791 raise FatalError('update_by_query API call failed', err) 

792 logger.debug('Checking update by query status...') 

793 logger.debug('response = %s', response) 

794 # task_check = Task(client, action='update_by_query', task_id=response['task']) 

795 try: 

796 # task_check.wait() 

797 es_waiter( 

798 client, Task, action='update_by_query', task_id=response['task'], **WAITKW 

799 ) 

800 except BadClientResult as exc: 

801 logger.error('Exception: %s', exc) 

802 raise FatalError('Failed to complete update by query', exc) 

803 logger.info('After update by query, %s', report_segment_count(client, index_name)) 

804 logger.debug('Update by query completed.') 

805 

806 

807def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict: 

808 """Remove any ILM policy associated with index 

809 

810 :param client: A client connection object 

811 :param index: The index 

812 

813 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

814 :type index: str 

815 

816 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}`` 

817 """ 

818 try: 

819 response = dict(client.ilm.remove_policy(index=index)) 

820 logger.debug(response) 

821 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

822 logger.error("Index: '%s' not found. Error: %s", index, err) 

823 raise MissingIndex(f'Index "{index}" not found', err, index) 

824 return response 

825 

826 

827def take_snapshot( 

828 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str 

829) -> None: 

830 """ 

831 Take snapshot of index 

832 

833 :param client: A client connection object 

834 :param repo_name: The repository name 

835 :param snap_name: The snapshot name 

836 :param index_name: The name of the index to snapshot 

837 

838 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

839 :type repo_name: str 

840 :type snap_name: str 

841 :type index_name: str 

842 """ 

843 logger.info('Creating new snapshot...') 

844 response = {} 

845 try: 

846 response = dict( 

847 client.snapshot.create( 

848 repository=repo_name, 

849 snapshot=snap_name, 

850 indices=index_name, 

851 wait_for_completion=False, 

852 ) 

853 ) 

854 logger.debug('Snapshot response: %s', response) 

855 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err: 

856 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}' 

857 logger.critical(msg) 

858 raise BadClientResult(msg, err) 

859 logger.info('Checking on status of snapshot...') 

860 # snapshot_check = Snapshot( 

861 # client, snapshot=snap_name, repository=repo_name, **WAITKW 

862 # ) 

863 

864 try: 

865 # snapshot_check.wait() 

866 es_waiter(client, Snapshot, snapshot=snap_name, repository=repo_name, **WAITKW) 

867 except BadClientResult as exc: 

868 logger.error('Exception: %s', exc) 

869 raise FatalError('Failed to complete index snapshot', exc) 

870 msg = ( 

871 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} ' 

872 f'succeeded.' 

873 ) 

874 logger.info(msg) 

875 

876 

877def update_doc( 

878 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0 

879) -> None: 

880 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc`` 

881 

882 :param client: A client connection object 

883 :param index: The index to write to 

884 :param doc_id: The document doc_id to update 

885 :param doc: The contents of the document 

886 :param routing: Because our tracking doc is using parent/child relationships, we 

887 need to route. We use an integer, but the API calls expect a string, so we 

888 manually cast this value in the API call as one. 

889 

890 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

891 :type index: str 

892 :type doc_id: str 

893 :type doc: dict 

894 :type routing: int 

895 """ 

896 try: 

897 if doc_id: 

898 _ = client.update( 

899 index=index, 

900 id=doc_id, 

901 doc=doc, 

902 doc_as_upsert=True, 

903 routing=str(routing), 

904 refresh=True, 

905 ) 

906 else: 

907 logger.debug('No value for document id. Creating new document.') 

908 _ = client.index( 

909 index=index, document=doc, routing=str(routing), refresh=True 

910 ) 

911 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

912 msg = f'Error updating document: {err.args[0]}' 

913 logger.error(msg) 

914 raise BadClientResult(msg, err) 

915 

916 

917def verify_index(client: 'Elasticsearch', index: str) -> bool: 

918 """Verify the index exists and is an index, not an alias 

919 

920 :param client: A client connection object 

921 :param index: The index to check 

922 

923 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

924 :type index: str 

925 """ 

926 logger.debug('Verifying index: %s', index) 

927 retval = True 

928 response = {} 

929 try: 

930 response = dict( 

931 client.indices.get_settings( 

932 index=index, expand_wildcards=['open', 'hidden'] 

933 ) 

934 ) 

935 except (ApiError, NotFoundError, TransportError, BadRequestError) as err: 

936 logger.error("Index: '%s' not found. Error: %s", index, err) 

937 retval = False 

938 logger.debug(response) 

939 if len(list(response.keys())) > 1: 

940 # We have more than one key, that means we hit an alias 

941 logger.error('Index %s is one member of an alias.', index) 

942 retval = False 

943 elif list(response.keys())[0] != index: 

944 # There's a 1 to 1 alias, but it is not the index name 

945 logger.error('Index %s is an alias.', index) 

946 retval = False 

947 return retval