Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 67%
383 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 23:49 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2025-03-17 23:49 -0600
1"""Functions making Elasticsearch API calls"""
3import typing as t
4import time
5import logging
6from elasticsearch8.exceptions import (
7 ApiError,
8 NotFoundError,
9 TransportError,
10 BadRequestError,
11)
12from es_wait import Health, Restore, Snapshot, Task
13from es_pii_tool.exceptions import (
14 BadClientResult,
15 FatalError,
16 MissingDocument,
17 MissingError,
18 MissingIndex,
19 ValueMismatch,
20)
21from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter, timing
23if t.TYPE_CHECKING:
24 from dotmap import DotMap # type: ignore
25 from elasticsearch8 import Elasticsearch
26 from elastic_transport import HeadApiResponse
29logger = logging.getLogger(__name__)
31# pylint: disable=R0913,R0917,W0707
34def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None:
35 """Assign index to alias(es)"""
36 try:
37 response = client.indices.put_alias(index=index_name, name=alias_name)
38 logger.info(
39 "Index '%s' was successfully added to alias '%s'", index_name, alias_name
40 )
41 logger.debug(response)
42 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
43 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed'
44 logger.critical(msg)
45 raise BadClientResult(msg, err)
48def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None:
49 """Check the index"""
50 logger.info('Making a quick check on redacted index docs...')
51 result = do_search(client, index_name, job_config['query'])
52 if result['hits']['total']['value'] == 0:
53 logger.warning(
54 'Query returned no results, assuming it only returns docs '
55 'to be redacted and not already redacted...'
56 )
57 return
58 success = check_fields(result, job_config)
59 if not success:
60 msg = 'One or more fields were not redacted. Check the logs'
61 logger.error(msg)
62 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0')
65def clear_cache(client: 'Elasticsearch', index_name: str) -> None:
66 """Clear the cache for named index
68 :param client: A client connection object
69 :param index_name: The index name
71 :type client: :py:class:`~.elasticsearch.Elasticsearch`
72 :type index_name: str
74 :returns: No return value
75 :rtype: None
76 """
77 response = {}
78 logger.info('Clearing cache data for %s...', index_name)
79 try:
80 response = dict(
81 client.indices.clear_cache(
82 index=index_name, expand_wildcards=['open', 'hidden']
83 )
84 )
85 logger.debug(response)
86 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
87 logger.error('clear_cache API call resulted in an error: %s', err)
90def close_index(client: 'Elasticsearch', name: str) -> None:
91 """Close an index
93 :param name: The index name to close
95 :type name: str
96 """
97 try:
98 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden'])
99 logger.debug(response)
100 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
101 logger.error("Index: '%s' not found. Error: %s", name, err)
102 raise MissingIndex(f'Index "{name}" not found', err, name)
105def create_index(
106 client: 'Elasticsearch',
107 name: str,
108 mappings: t.Union[t.Dict, None] = None,
109 settings: t.Union[t.Dict, None] = None,
110) -> None:
111 """Create an Elasticsearch index with associated mappings and settings
113 :param name: The index name
114 :param mappings: The index mappings
115 :param settings: The index settings
117 :type name: str
118 :type mappings: dict
119 :type settings: dict
120 """
121 if index_exists(client, name):
122 logger.info('Index %s already exists', name)
123 return
124 try:
125 response = client.indices.create(
126 index=name, settings=settings, mappings=mappings
127 )
128 logger.debug(response)
129 except BadRequestError as err:
130 logger.error("Index: '%s' already exists. Error: %s", name, err)
131 raise BadClientResult(f'Index "{name}" already exists', err)
132 except (ApiError, TransportError) as err:
133 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err)
134 raise BadClientResult(f'Unknown error trying to create index: {name}', err)
137def delete_index(client: 'Elasticsearch', name: str) -> None:
138 """Delete an index
140 :param client: A client connection object
141 :param name: The index name to delete
143 :type name: str
144 """
145 try:
146 response = client.indices.delete(
147 index=name, expand_wildcards=['open', 'hidden']
148 )
149 logger.debug(response)
150 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
151 # logger.error("Index: '%s' not found. Error: %s", name, err)
152 raise MissingIndex(f'Index "{name}" not found', err, name)
155def do_search(
156 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10
157) -> t.Dict:
158 """Return search result of ``query`` against ``index_pattern``
160 :param client: A client connection object
161 :param index_pattern: A single index name, a csv list of indices, or other pattern
162 :param query: An Elasticsearch DSL search query
163 :param size: Maximum number of results to return
165 :type client: :py:class:`~.elasticsearch.Elasticsearch`
166 :type index_pattern: str
167 :type query: dict
168 :type size: int
169 """
170 kwargs = {
171 'index': index_pattern,
172 'query': query,
173 'size': size,
174 'expand_wildcards': ['open', 'hidden'],
175 }
176 logger.debug('Search kwargs = %s', kwargs)
177 try:
178 response = dict(client.search(**kwargs)) # type: ignore
179 logger.debug(response)
180 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
181 msg = f'Attempt to collect search results yielded an exception: {err}'
182 logger.critical(msg)
183 raise BadClientResult(msg, err)
184 return response
187def forcemerge_index(
188 client: 'Elasticsearch',
189 index: t.Union[str, None] = None,
190 max_num_segments: int = 1,
191 only_expunge_deletes: bool = False,
192) -> None:
193 """
194 Force Merge an index
196 :param client: A client connection object
197 :param index: A single index name
198 :param max_num_segments: The maximum number of segments per shard after a
199 force merge
200 :param only_expunge_deletes: Only expunge deleted docs during force merging.
201 If True, ignores max_num_segments.
203 :type client: :py:class:`~.elasticsearch.Elasticsearch`
204 :type index: str
205 :type max_num_segments: int
206 :type only_expunge_deletes: bool
207 """
208 kwargs = {'index': index, 'wait_for_completion': False}
209 if only_expunge_deletes:
210 kwargs.update({'only_expunge_deletes': only_expunge_deletes})
211 else:
212 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore
213 try:
214 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore
215 logger.debug(response)
216 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
217 logger.error("Index: '%s' not found. Error: %s", index, err)
218 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore
219 logger.info('Waiting for forcemerge to complete...')
220 pause, timeout = timing('task')
221 logger.debug(f'ENV pause = {pause}, timeout = {timeout}')
222 try:
223 # task_check.wait()
224 es_waiter(
225 client,
226 Task,
227 action='forcemerge',
228 task_id=response['task'],
229 pause=pause,
230 timeout=timeout,
231 )
232 except BadClientResult as exc:
233 logger.error('Exception: %s', exc)
234 raise FatalError('Failed to forcemerge', exc)
235 logger.info('Forcemerge completed.')
238def generic_get(func: t.Callable, **kwargs) -> t.Dict:
239 """Generic, reusable client request getter"""
240 try:
241 response = dict(func(**kwargs))
242 logger.debug(response)
243 except NotFoundError as nferr:
244 raise MissingError('Generic Get MissingError', nferr, nferr.info)
245 except (ApiError, TransportError, BadRequestError) as err:
246 raise BadClientResult('Generic Get BadClientResult Failure', err)
247 return response
250def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int:
251 """Return the number of hits matching the query
253 :param client: A client connection object
254 :param index: The index or pattern to search
255 :param query: The query to execute
257 :type client: :py:class:`~.elasticsearch.Elasticsearch`
258 :type index: str
259 :type query: dict
261 :returns: The number of hits matching the query
262 """
263 result = do_search(client, index, query)
264 return result['hits']['total']['value']
267def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict:
268 """Get the ILM lifecycle settings for an index
270 :param client: A client connection object
271 :param index: The index to check
273 :type client: :py:class:`~.elasticsearch.Elasticsearch`
274 :type index: str
276 :returns: The ILM settings object for the named index
277 """
278 try:
279 response = dict(client.ilm.explain_lifecycle(index=index))
280 logger.debug(response)
281 except NotFoundError as err:
282 logger.error("Index: '%s' not found. Error: %s", index, err)
283 raise MissingIndex(f'Index "{index}" not found', err, index)
284 return response
287def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict:
288 """Get the ILM lifecycle settings for an policyname
290 :param client: A client connection object
291 :param policyname: The ILM policy name to check
293 :type client: :py:class:`~.elasticsearch.Elasticsearch`
294 :type policyname: str
296 :returns: The ILM settings object for the named policy, or None
297 """
298 retval = {}
299 try:
300 retval = dict(client.ilm.get_lifecycle(name=policyname))
301 except NotFoundError:
302 logger.debug("ILM policy '%s' not found.", policyname)
303 return retval
306def get_index(client: 'Elasticsearch', index: str) -> t.Dict:
307 """Get the info about an index
309 :param client: A client connection object
310 :param index: The index, csv indices, or index pattern to get
312 :type client: :py:class:`~.elasticsearch.Elasticsearch`
313 :type index: str
315 :returns: The index information object for the named index
316 """
317 try:
318 response = dict(
319 client.indices.get(index=index, expand_wildcards=['open', 'hidden'])
320 )
321 logger.debug('Found indices: %s', list(response.keys()))
322 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
323 logger.error("Index: '%s' not found. Error: %s", index, err)
324 raise MissingIndex(f'Index "{index}" not found', err, index)
325 return response
328def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]:
329 """Get the index's ILM phase
331 :param client: A client connection object
332 :param index: The index name
334 :type client: :py:class:`~.elasticsearch.Elasticsearch`
335 :type index: str
337 :returns: The ILM phase of ``index``
338 """
339 phase = None
340 ilm = get_ilm(client, index)
341 try:
342 phase = ilm['indices'][index]['phase']
343 except KeyError: # Perhaps in cold/frozen but not ILM affiliated
344 settings = get_settings(client, index)[index]['settings']['index']
345 if "store" in settings:
346 # Checking if it's a mounted searchable snapshot
347 if settings["store"]["type"] == "snapshot":
348 phase = get_phase_from_tier_pref(settings)
349 else:
350 phase = None
351 return phase
354def get_phase_from_tier_pref(
355 idx_settings: t.Dict,
356) -> t.Union[t.Literal['frozen', 'cold'], None]:
357 """
358 Check the index's ``_tier_preference`` as an indicator which phase the index is in
360 :param idx_settings: The results from a
361 get_settings(index=idx)[idx]['settings']['index'] call
363 :returns: The ILM phase based on the index settings, or None
364 """
365 try:
366 tiers = idx_settings['routing']['allocation']['include']['_tier_preference']
367 except KeyError:
368 tiers = ''
369 if tiers == 'data_frozen':
370 return 'frozen'
371 if 'data_cold' in tiers.split(','):
372 return 'cold'
373 return None
376def ilm_move(
377 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict
378) -> None:
379 """Move index 'name' from the current step to the next step"""
380 try:
381 client.ilm.move_to_step(
382 index=name, current_step=current_step, next_step=next_step
383 )
384 except Exception as err:
385 msg = (
386 f'Unable to move index {name} to ILM next step: {next_step}. '
387 f'Error: {err}'
388 )
389 logger.critical(msg)
390 raise BadClientResult(msg, err)
393def modify_data_stream(
394 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]]
395) -> None:
396 """Modify a data_stream using the contents of actions
398 :param client: A client connection object
399 :param actions: The actions to take
401 :type client: :py:class:`~.elasticsearch.Elasticsearch`
402 :type actions: dict
403 """
404 try:
405 client.indices.modify_data_stream(actions=actions)
406 except BadRequestError as exc:
407 logger.error(
408 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc
409 )
410 raise MissingIndex(
411 'Missing either data_stream or index', exc, f'actions: {actions}'
412 )
415def report_segment_count(client: 'Elasticsearch', index: str) -> str:
416 """
417 Report the count of segments from index
419 :param client: A client connection object
420 :param index: The index to check
422 :type client: :py:class:`~.elasticsearch.Elasticsearch`
423 :type index: str
425 :returns: Formatted message describing shard count and segment count for index
426 """
427 shardcount = 0
428 segmentcount = 0
429 try:
430 output = client.cat.shards(
431 index=index, format='json', h=['index', 'shard', 'prirep', 'sc']
432 )
433 except Exception as exc:
434 logger.error('Exception: %s', exc)
435 raise BadClientResult('Unable to get cat shards output', exc)
436 for shard in output:
437 if shard['prirep'] == 'r': # type: ignore
438 # Skip replica shards
439 continue
440 if index != shard['index']: # type: ignore
441 logger.warning(
442 'Index name %s does not match what was returned by the _cat API: %s',
443 index,
444 shard['index'], # type: ignore
445 )
446 shardcount += 1
447 segmentcount += int(shard['sc']) # type: ignore
448 logger.debug(
449 'Index %s, shard %s has %s segments',
450 index,
451 shard["shard"], # type: ignore
452 shard["sc"], # type: ignore
453 )
455 return (
456 f'index {index} has {shardcount} shards and a total of {segmentcount} '
457 f'segments, averaging {float(segmentcount/shardcount)} segments per shard'
458 )
461def get_settings(client: 'Elasticsearch', index: str) -> t.Dict:
462 """Get the settings for an index
464 :param client: A client connection object
465 :param index: The index to check
467 :type client: :py:class:`~.elasticsearch.Elasticsearch`
468 :type index: str
470 :returns: The settings object for the named index
471 """
472 logger.debug('Getting settings for index: %s', index)
473 try:
474 response = dict(
475 client.indices.get_settings(
476 index=index, expand_wildcards=['open', 'hidden']
477 )
478 )
479 logger.debug(response)
480 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
481 logger.error("Index: '%s' not found. Error: %s", index, err)
482 raise MissingIndex(f'Index "{index}" not found', err, index)
483 logger.debug('Index settings collected.')
484 return response
487def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None:
488 """Modify a data_stream using the contents of actions
490 :param client: A client connection object
491 :param settings: The index settings to apply
493 :type client: :py:class:`~.elasticsearch.Elasticsearch`
494 :type settings: dict
495 """
496 try:
497 client.indices.put_settings(index=index, settings=settings)
498 except NotFoundError as exc:
499 logger.error("Index '%s' not found: %s", index, exc)
500 raise MissingIndex('Index not found', exc, index)
501 except BadRequestError as exc:
502 logger.error("Bad settings: %s. ERROR: %s", settings, exc)
503 raise BadClientResult(f'Invalid settings: {settings}', exc)
506def get_progress_doc(
507 client: 'Elasticsearch',
508 index_name: str,
509 job_id: str,
510 task_id: str,
511 stepname: str = '',
512) -> t.Dict:
513 """Get a task tracking doc
515 :param client: A client connection object
516 :param index_name: The index name
517 :param job_id: The job name string for the present redaction run
518 :param task_id: The task_id string of the task we are searching for
519 :param stepname: [Optional] The step name string of the step we are searching for
521 :type client: :py:class:`~.elasticsearch.Elasticsearch`
522 :type index_name: str
523 :type job_id: str
524 :type task_id: str
525 :type stepname: str
527 :returns: The progress tracking document from the progress/status tracking index
528 for the task or step
529 """
530 # Base value for stub (task)
531 stub = f'Task: {task_id} of Job: {job_id}'
532 # The proto query
533 query = {
534 "bool": {
535 "must": {"parent_id": {"type": "task", "id": job_id}},
536 "filter": [],
537 }
538 }
539 # The base value of the bool filter (task)
540 filters = [
541 {"term": {"task": task_id}},
542 {"term": {"job": job_id}},
543 ]
544 if not stepname:
545 logger.info('Tracking progress for %s', stub)
546 # For Tasks progress docs, we must not match docs with a step field
547 query['bool']['must_not'] = {"exists": {"field": "step"}}
548 else:
549 # Update stub to be for a step
550 stub = f'Step: {stepname} of Task: {task_id} of Job: {job_id}'
551 logger.info('Tracking progress for %s', stub)
552 # Update filters to include step
553 filters.append({"term": {"step": stepname}})
554 # Add the filters to the query
555 query['bool']['filter'] = filters # type: ignore
556 try:
557 result = do_search(client, index_pattern=index_name, query=query)
558 except NotFoundError as err:
559 msg = f'Tracking index {index_name} is missing'
560 logger.critical(msg)
561 raise MissingIndex(msg, err, index_name)
562 # First get the edge case of multiple hits out of the way
563 if result['hits']['total']['value'] > 1:
564 msg = f'Tracking document for {stub} is not unique. This should never happen.'
565 logger.critical(msg)
566 raise FatalError(msg, ValueError())
567 # After the > 1 test, if we don't have exactly 1 hit, we have zero hits
568 if result['hits']['total']['value'] != 1:
569 msg = f'Tracking document for {stub} does not exist'
570 missing = f'A document with step: {stepname}, task: {task_id}, job: {job_id}'
571 logger.debug(msg)
572 raise MissingDocument(msg, Exception(), missing)
573 # There can be only one...
574 return result['hits']['hits'][0]
577def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict:
578 """Get the progress/status tracking doc for the provided job_id
580 :param client: A client connection object
581 :param index_name: The index name
582 :param job_id: The job_id string for the present redaction run
584 :type client: :py:class:`~.elasticsearch.Elasticsearch`
585 :type index_name: str
586 :type job_id: str
588 :returns: The tracking document from the progress/status tracking index
589 """
590 if not index_exists(client, index_name):
591 msg = f'Tracking index {index_name} is missing'
592 logger.critical(msg)
593 raise MissingIndex(msg, Exception(), index_name)
594 try:
595 doc = dict(client.get(index=index_name, id=job_id))
596 # logger.debug('TRACKING DOC = %s', doc)
597 except NotFoundError as exc:
598 msg = f'Tracking document for job_id {job_id} does not exist'
599 logger.debug(msg)
600 raise MissingDocument(msg, exc, job_id)
601 return doc['_source']
604def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse':
605 """Test whether index ``index_name`` exists
607 :param client: A client connection object
608 :param index_name: The index name
610 :type client: :py:class:`~.elasticsearch.Elasticsearch`
611 :type index_name: str
613 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise
614 ``HeadApiResponse(False)``
615 """
616 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden'])
619def job_exists(
620 client: 'Elasticsearch', index_name: str, job_id: str
621) -> 'HeadApiResponse':
622 """Test whether a document exists for the present job_id
624 :param client: A client connection object
625 :param index_name: The index name
626 :param job_id: The job_id string for the present redaction run
628 :type client: :py:class:`~.elasticsearch.Elasticsearch`
629 :type index_name: str
630 :type job_id: str
632 :returns: ``HeadApiResponse(True)`` if a document exists with the present
633 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)``
634 """
635 return client.exists(index=index_name, id=job_id)
638def mount_index(var: 'DotMap') -> None:
639 """Mount index as a searchable snapshot
641 :param var: A collection of variables from
642 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var`
644 :type var: DotMap
645 """
646 response = {}
647 msg = (
648 f'Mounting {var.redaction_target} renamed as {var.mount_name} '
649 f'from repository: {var.repository}, snapshot: {var.new_snap_name} '
650 f'with storage={var.storage}'
651 )
652 logger.debug(msg)
653 while index_exists(var.client, var.mount_name):
654 logger.warning('Index %s exists. Deleting before remounting', var.mount_name)
655 delete_index(var.client, var.mount_name)
656 time.sleep(3.0)
657 try:
658 response = dict(
659 var.client.searchable_snapshots.mount(
660 repository=var.repository,
661 snapshot=var.new_snap_name,
662 index=var.redaction_target,
663 renamed_index=var.mount_name,
664 storage=var.storage,
665 )
666 )
667 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
668 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err)
669 logger.debug(response)
670 raise BadClientResult('Error when mount index attempted', err)
671 logger.info('Ensuring searchable snapshot mount is in "green" health state...')
672 pause, timeout = timing('health')
673 logger.debug(f'ENV pause = {pause}, timeout = {timeout}')
674 try:
675 es_waiter(
676 var.client,
677 Health,
678 check_type='status',
679 indices=var.mount_name,
680 pause=pause,
681 timeout=timeout,
682 )
683 except BadClientResult as exc:
684 logger.error('Exception: %s', exc)
685 raise FatalError('Failed to mount index from snapshot', exc)
686 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name)
689def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict:
690 """Resolve an index
692 :param client: A client connection object
693 :param index: The index name
695 :type client: :py:class:`~.elasticsearch.Elasticsearch`
696 :type index: str
698 :returns: The return value from
699 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index`
700 :rtype: dict
701 """
702 logger.debug('Resolving index: %s', index)
703 try:
704 response = dict(
705 client.indices.resolve_index(
706 name=index, expand_wildcards=['open', 'hidden']
707 )
708 )
709 logger.debug(response)
710 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
711 logger.error("Index: '%s' not found. Error: %s", index, err)
712 raise MissingIndex(f'Index "{index}" not found', err, index)
713 logger.debug('Index resolved.')
714 return response
717def restore_index(
718 client: 'Elasticsearch',
719 repo_name: str,
720 snap_name: str,
721 index_name: str,
722 replacement: str,
723 re_pattern: str = '(.+)',
724 index_settings: t.Union[str, None] = None,
725) -> None:
726 """Restore an index
728 :param client: A client connection object
729 :param repo_name: The repository name
730 :param snap_name: The snapshot name
731 :param index_name: The index name as it appears in the snapshot metadata
732 :param replacement: The name or substitution string to use as the restored index
733 name
734 :param re_pattern: The optional rename pattern for use with ``replacement``
735 :param index_settings: Any settings to apply to the restored index, such as
736 _tier_preference
738 :type client: :py:class:`~.elasticsearch.Elasticsearch`
739 :type repo_name: str
740 :type snap_name: str
741 :type index_name: str
742 :type replacement: str
743 :type re_pattern: str
744 :type index_settings: dict
745 """
746 msg = (
747 f"repository={repo_name}, snapshot={snap_name}, indices={index_name},"
748 f"include_aliases=False,"
749 f"ignore_index_settings=["
750 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias',"
751 f" 'index.routing.allocation.include._tier_preference'],"
752 f"index_settings={index_settings},"
753 f"rename_pattern={re_pattern},"
754 f"rename_replacement={replacement},"
755 f"wait_for_completion=False"
756 )
757 logger.debug('RESTORE settings: %s', msg)
758 try:
759 response = client.snapshot.restore(
760 repository=repo_name,
761 snapshot=snap_name,
762 indices=index_name,
763 include_aliases=False,
764 ignore_index_settings=[
765 'index.lifecycle.name',
766 'index.lifecycle.rollover_alias',
767 'index.routing.allocation.include._tier_preference',
768 ],
769 index_settings=index_settings, # type: ignore
770 rename_pattern=re_pattern,
771 rename_replacement=replacement,
772 wait_for_completion=False,
773 )
774 logger.debug('Response = %s', response)
775 logger.info('Checking if restoration completed...')
776 pause, timeout = timing('restore')
777 logger.debug(f'ENV pause = {pause}, timeout = {timeout}')
778 try:
779 es_waiter(
780 client, Restore, index_list=[replacement], pause=pause, timeout=timeout
781 )
782 except BadClientResult as bad:
783 logger.error('Exception: %s', bad)
784 raise BadClientResult('Failed to restore index from snapshot', bad)
785 msg = f'Restoration of index {index_name} as {replacement} complete'
786 logger.info(msg)
787 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
788 msg = (
789 f'Restoration of index {index_name} as {replacement} yielded an error: '
790 f'{err}'
791 )
792 logger.error(msg)
793 raise BadClientResult(msg, err)
794 # verify index is green
795 logger.info('Ensuring restored index is in "green" health state...')
796 res = dict(client.cluster.health(index=replacement, filter_path='status'))
797 logger.debug('res = %s', res)
798 if res['status'] == 'red':
799 msg = f'Restored index {replacement} is not in a healthy state'
800 logger.error(msg)
801 raise ValueMismatch(msg, 'index health is "red"', 'green or yellow')
804def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None:
805 """Redact data from an index using a painless script.
807 Collect the task_id and wait for the reinding job to complete before returning
809 :param client: A client connection object
810 :param index_name: The index to act on
811 :param config: The config block being iterated. Contains ``query``, ``message``,
812 and ``fields``
814 :type client: :py:class:`~.elasticsearch.Elasticsearch`
815 :type index_name: str
816 :type config: dict
817 """
818 logger.debug('Begin redaction...')
819 logger.info('Before update by query, %s', report_segment_count(client, index_name))
820 logger.debug('Updating and redacting data...')
821 script = build_script(config['message'], config['fields'])
822 response = {}
823 try:
824 response = dict(
825 client.update_by_query(
826 index=index_name,
827 script=script,
828 query=config['query'],
829 wait_for_completion=False,
830 expand_wildcards=['open', 'hidden'],
831 )
832 )
833 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
834 logger.critical('update_by_query yielded an error: %s', err)
835 raise FatalError('update_by_query API call failed', err)
836 logger.debug('Checking update by query status...')
837 logger.debug('response = %s', response)
838 pause, timeout = timing('task')
839 logger.debug(f'ENV pause = {pause}, timeout = {timeout}')
840 try:
841 es_waiter(
842 client,
843 Task,
844 action='update_by_query',
845 task_id=response['task'],
846 pause=pause,
847 timeout=timeout,
848 )
849 except BadClientResult as exc:
850 logger.error('Exception: %s', exc)
851 raise FatalError('Failed to complete update by query', exc)
852 logger.info('After update by query, %s', report_segment_count(client, index_name))
853 logger.debug('Update by query completed.')
856def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict:
857 """Remove any ILM policy associated with index
859 :param client: A client connection object
860 :param index: The index
862 :type client: :py:class:`~.elasticsearch.Elasticsearch`
863 :type index: str
865 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}``
866 """
867 try:
868 response = dict(client.ilm.remove_policy(index=index))
869 logger.debug(response)
870 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
871 logger.error("Index: '%s' not found. Error: %s", index, err)
872 raise MissingIndex(f'Index "{index}" not found', err, index)
873 return response
876def take_snapshot(
877 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str
878) -> None:
879 """
880 Take snapshot of index
882 :param client: A client connection object
883 :param repo_name: The repository name
884 :param snap_name: The snapshot name
885 :param index_name: The name of the index to snapshot
887 :type client: :py:class:`~.elasticsearch.Elasticsearch`
888 :type repo_name: str
889 :type snap_name: str
890 :type index_name: str
891 """
892 logger.info('Creating new snapshot...')
893 response = {}
894 try:
895 response = dict(
896 client.snapshot.create(
897 repository=repo_name,
898 snapshot=snap_name,
899 indices=index_name,
900 wait_for_completion=False,
901 )
902 )
903 logger.debug('Snapshot response: %s', response)
904 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err:
905 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}'
906 logger.critical(msg)
907 raise BadClientResult(msg, err)
908 logger.info('Checking on status of snapshot...')
909 pause, timeout = timing('snapshot')
910 logger.debug(f'ENV pause = {pause}, timeout = {timeout}')
911 try:
912 es_waiter(
913 client,
914 Snapshot,
915 snapshot=snap_name,
916 repository=repo_name,
917 pause=pause,
918 timeout=timeout,
919 )
920 except BadClientResult as exc:
921 logger.error('Exception: %s', exc)
922 raise FatalError('Failed to complete index snapshot', exc)
923 msg = (
924 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} '
925 f'succeeded.'
926 )
927 logger.info(msg)
930def update_doc(
931 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0
932) -> None:
933 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc``
935 :param client: A client connection object
936 :param index: The index to write to
937 :param doc_id: The document doc_id to update
938 :param doc: The contents of the document
939 :param routing: Because our tracking doc is using parent/child relationships, we
940 need to route. We use an integer, but the API calls expect a string, so we
941 manually cast this value in the API call as one.
943 :type client: :py:class:`~.elasticsearch.Elasticsearch`
944 :type index: str
945 :type doc_id: str
946 :type doc: dict
947 :type routing: int
948 """
949 try:
950 if doc_id:
951 _ = client.update(
952 index=index,
953 id=doc_id,
954 doc=doc,
955 doc_as_upsert=True,
956 routing=str(routing),
957 refresh=True,
958 )
959 else:
960 logger.debug('No value for document id. Creating new document.')
961 _ = client.index(
962 index=index, document=doc, routing=str(routing), refresh=True
963 )
964 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
965 msg = f'Error updating document: {err.args[0]}'
966 logger.error(msg)
967 raise BadClientResult(msg, err)
970def verify_index(client: 'Elasticsearch', index: str) -> bool:
971 """Verify the index exists and is an index, not an alias
973 :param client: A client connection object
974 :param index: The index to check
976 :type client: :py:class:`~.elasticsearch.Elasticsearch`
977 :type index: str
978 """
979 logger.debug('Verifying index: %s', index)
980 retval = True
981 response = {}
982 try:
983 response = dict(
984 client.indices.get_settings(
985 index=index, expand_wildcards=['open', 'hidden']
986 )
987 )
988 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
989 logger.error("Index: '%s' not found. Error: %s", index, err)
990 retval = False
991 logger.debug(response)
992 if len(list(response.keys())) > 1:
993 # We have more than one key, that means we hit an alias
994 logger.error('Index %s is one member of an alias.', index)
995 retval = False
996 elif list(response.keys())[0] != index:
997 # There's a 1 to 1 alias, but it is not the index name
998 logger.error('Index %s is an alias.', index)
999 retval = False
1000 return retval