Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 66%
378 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-01-29 19:53 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2025-01-29 19:53 -0700
1"""Functions making Elasticsearch API calls"""
3from os import getenv
4import typing as t
5import time
6import logging
7from elasticsearch8.exceptions import (
8 ApiError,
9 NotFoundError,
10 TransportError,
11 BadRequestError,
12)
13from es_wait import Index, Restore, Snapshot, Task
14from es_pii_tool.defaults import (
15 PAUSE_DEFAULT,
16 PAUSE_ENVVAR,
17 TIMEOUT_DEFAULT,
18 TIMEOUT_ENVVAR,
19)
20from es_pii_tool.exceptions import (
21 BadClientResult,
22 FatalError,
23 MissingDocument,
24 MissingError,
25 MissingIndex,
26 ValueMismatch,
27)
28from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter
30if t.TYPE_CHECKING:
31 from dotmap import DotMap # type: ignore
32 from elasticsearch8 import Elasticsearch
33 from elastic_transport import HeadApiResponse
35PAUSE_VALUE = float(getenv(PAUSE_ENVVAR, default=PAUSE_DEFAULT))
36TIMEOUT_VALUE = float(getenv(TIMEOUT_ENVVAR, default=TIMEOUT_DEFAULT))
37WAITKW = {'pause': PAUSE_VALUE, 'timeout': TIMEOUT_VALUE}
39logger = logging.getLogger(__name__)
41# pylint: disable=R0913,W0707
44def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None:
45 """Assign index to alias(es)"""
46 try:
47 response = client.indices.put_alias(index=index_name, name=alias_name)
48 logger.info(
49 "Index '%s' was successfully added to alias '%s'", index_name, alias_name
50 )
51 logger.debug(response)
52 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
53 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed'
54 logger.critical(msg)
55 raise BadClientResult(msg, err)
58def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None:
59 """Check the index"""
60 logger.info('Making a quick check on redacted index docs...')
61 result = do_search(client, index_name, job_config['query'])
62 if result['hits']['total']['value'] == 0:
63 logger.warning(
64 'Query returned no results, assuming it only returns docs '
65 'to be redacted and not already redacted...'
66 )
67 return
68 success = check_fields(result, job_config)
69 if not success:
70 msg = 'One or more fields were not redacted. Check the logs'
71 logger.error(msg)
72 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0')
75def clear_cache(client: 'Elasticsearch', index_name: str) -> None:
76 """Clear the cache for named index
78 :param client: A client connection object
79 :param index_name: The index name
81 :type client: :py:class:`~.elasticsearch.Elasticsearch`
82 :type index_name: str
84 :returns: No return value
85 :rtype: None
86 """
87 response = {}
88 logger.info('Clearing cache data for %s...', index_name)
89 try:
90 response = dict(
91 client.indices.clear_cache(
92 index=index_name, expand_wildcards=['open', 'hidden']
93 )
94 )
95 logger.debug(response)
96 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
97 logger.error('clear_cache API call resulted in an error: %s', err)
100def close_index(client: 'Elasticsearch', name: str) -> None:
101 """Close an index
103 :param name: The index name to close
105 :type name: str
106 """
107 try:
108 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden'])
109 logger.debug(response)
110 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
111 logger.error("Index: '%s' not found. Error: %s", name, err)
112 raise MissingIndex(f'Index "{name}" not found', err, name)
115def create_index(
116 client: 'Elasticsearch',
117 name: str,
118 mappings: t.Union[t.Dict, None] = None,
119 settings: t.Union[t.Dict, None] = None,
120) -> None:
121 """Create an Elasticsearch index with associated mappings and settings
123 :param name: The index name
124 :param mappings: The index mappings
125 :param settings: The index settings
127 :type name: str
128 :type mappings: dict
129 :type settings: dict
130 """
131 if index_exists(client, name):
132 logger.info('Index %s already exists', name)
133 return
134 try:
135 response = client.indices.create(
136 index=name, settings=settings, mappings=mappings
137 )
138 logger.debug(response)
139 except BadRequestError as err:
140 logger.error("Index: '%s' already exists. Error: %s", name, err)
141 raise BadClientResult(f'Index "{name}" already exists', err)
142 except (ApiError, TransportError) as err:
143 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err)
144 raise BadClientResult(f'Unknown error trying to create index: {name}', err)
147def delete_index(client: 'Elasticsearch', name: str) -> None:
148 """Delete an index
150 :param client: A client connection object
151 :param name: The index name to delete
153 :type name: str
154 """
155 try:
156 response = client.indices.delete(
157 index=name, expand_wildcards=['open', 'hidden']
158 )
159 logger.debug(response)
160 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
161 # logger.error("Index: '%s' not found. Error: %s", name, err)
162 raise MissingIndex(f'Index "{name}" not found', err, name)
165def do_search(
166 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10
167) -> t.Dict:
168 """Return search result of ``query`` against ``index_pattern``
170 :param client: A client connection object
171 :param index_pattern: A single index name, a csv list of indices, or other pattern
172 :param query: An Elasticsearch DSL search query
173 :param size: Maximum number of results to return
175 :type client: :py:class:`~.elasticsearch.Elasticsearch`
176 :type index_pattern: str
177 :type query: dict
178 :type size: int
179 """
180 kwargs = {
181 'index': index_pattern,
182 'query': query,
183 'size': size,
184 'expand_wildcards': ['open', 'hidden'],
185 }
186 logger.debug('Search kwargs = %s', kwargs)
187 try:
188 response = dict(client.search(**kwargs)) # type: ignore
189 logger.debug(response)
190 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
191 msg = f'Attempt to collect search results yielded an exception: {err}'
192 logger.critical(msg)
193 raise BadClientResult(msg, err)
194 return response
197def forcemerge_index(
198 client: 'Elasticsearch',
199 index: t.Union[str, None] = None,
200 max_num_segments: int = 1,
201 only_expunge_deletes: bool = False,
202) -> None:
203 """
204 Force Merge an index
206 :param client: A client connection object
207 :param index: A single index name
208 :param max_num_segments: The maximum number of segments per shard after a
209 force merge
210 :param only_expunge_deletes: Only expunge deleted docs during force merging.
211 If True, ignores max_num_segments.
213 :type client: :py:class:`~.elasticsearch.Elasticsearch`
214 :type index: str
215 :type max_num_segments: int
216 :type only_expunge_deletes: bool
217 """
218 kwargs = {'index': index, 'wait_for_completion': False}
219 if only_expunge_deletes:
220 kwargs.update({'only_expunge_deletes': only_expunge_deletes})
221 else:
222 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore
223 try:
224 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore
225 logger.debug(response)
226 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
227 logger.error("Index: '%s' not found. Error: %s", index, err)
228 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore
229 logger.info('Waiting for forcemerge to complete...')
230 # task_check = Task(
231 # client,
232 # action='forcemerge',
233 # task_id=response['task'],
234 # pause=PAUSE_VALUE,
235 # timeout=TIMEOUT_VALUE,
236 # )
237 try:
238 # task_check.wait()
239 es_waiter(client, Task, action='forcemerge', task_id=response['task'], **WAITKW)
240 except BadClientResult as exc:
241 logger.error('Exception: %s', exc)
242 raise FatalError('Failed to forcemerge', exc)
243 logger.info('Forcemerge completed.')
246def generic_get(func: t.Callable, **kwargs) -> t.Dict:
247 """Generic, reusable client request getter"""
248 try:
249 response = dict(func(**kwargs))
250 logger.debug(response)
251 except NotFoundError as nferr:
252 raise MissingError('Generic Get MissingError', nferr, nferr.info)
253 except (ApiError, TransportError, BadRequestError) as err:
254 raise BadClientResult('Generic Get BadClientResult Failure', err)
255 return response
258def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int:
259 """Return the number of hits matching the query
261 :param client: A client connection object
262 :param index: The index or pattern to search
263 :param query: The query to execute
265 :type client: :py:class:`~.elasticsearch.Elasticsearch`
266 :type index: str
267 :type query: dict
269 :returns: The number of hits matching the query
270 """
271 result = do_search(client, index, query)
272 return result['hits']['total']['value']
275def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict:
276 """Get the ILM lifecycle settings for an index
278 :param client: A client connection object
279 :param index: The index to check
281 :type client: :py:class:`~.elasticsearch.Elasticsearch`
282 :type index: str
284 :returns: The ILM settings object for the named index
285 """
286 try:
287 response = dict(client.ilm.explain_lifecycle(index=index))
288 logger.debug(response)
289 except NotFoundError as err:
290 logger.error("Index: '%s' not found. Error: %s", index, err)
291 raise MissingIndex(f'Index "{index}" not found', err, index)
292 return response
295def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict:
296 """Get the ILM lifecycle settings for an policyname
298 :param client: A client connection object
299 :param policyname: The ILM policy name to check
301 :type client: :py:class:`~.elasticsearch.Elasticsearch`
302 :type policyname: str
304 :returns: The ILM settings object for the named policy, or None
305 """
306 retval = {}
307 try:
308 retval = dict(client.ilm.get_lifecycle(name=policyname))
309 except NotFoundError:
310 logger.debug("ILM policy '%s' not found.", policyname)
311 return retval
314def get_index(client: 'Elasticsearch', index: str) -> t.Dict:
315 """Get the info about an index
317 :param client: A client connection object
318 :param index: The index, csv indices, or index pattern to get
320 :type client: :py:class:`~.elasticsearch.Elasticsearch`
321 :type index: str
323 :returns: The index information object for the named index
324 """
325 try:
326 response = dict(
327 client.indices.get(index=index, expand_wildcards=['open', 'hidden'])
328 )
329 logger.debug('Found indices: %s', list(response.keys()))
330 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
331 logger.error("Index: '%s' not found. Error: %s", index, err)
332 raise MissingIndex(f'Index "{index}" not found', err, index)
333 return response
336def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]:
337 """Get the index's ILM phase
339 :param client: A client connection object
340 :param index: The index name
342 :type client: :py:class:`~.elasticsearch.Elasticsearch`
343 :type index: str
345 :returns: The ILM phase of ``index``
346 """
347 phase = None
348 ilm = get_ilm(client, index)
349 try:
350 phase = ilm['indices'][index]['phase']
351 except KeyError: # Perhaps in cold/frozen but not ILM affiliated
352 settings = get_settings(client, index)[index]['settings']['index']
353 if "store" in settings:
354 # Checking if it's a mounted searchable snapshot
355 if settings["store"]["type"] == "snapshot":
356 phase = get_phase_from_tier_pref(settings)
357 else:
358 phase = None
359 return phase
362def get_phase_from_tier_pref(
363 idx_settings: t.Dict,
364) -> t.Union[t.Literal['frozen', 'cold'], None]:
365 """
366 Check the index's ``_tier_preference`` as an indicator which phase the index is in
368 :param idx_settings: The results from a
369 get_settings(index=idx)[idx]['settings']['index'] call
371 :returns: The ILM phase based on the index settings, or None
372 """
373 try:
374 tiers = idx_settings['routing']['allocation']['include']['_tier_preference']
375 except KeyError:
376 tiers = ''
377 if tiers == 'data_frozen':
378 return 'frozen'
379 if 'data_cold' in tiers.split(','):
380 return 'cold'
381 return None
384def ilm_move(
385 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict
386) -> None:
387 """Move index 'name' from the current step to the next step"""
388 try:
389 client.ilm.move_to_step(
390 index=name, current_step=current_step, next_step=next_step
391 )
392 except Exception as err:
393 msg = (
394 f'Unable to move index {name} to ILM next step: {next_step}. '
395 f'Error: {err}'
396 )
397 logger.critical(msg)
398 raise BadClientResult(msg, err)
401def modify_data_stream(
402 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]]
403) -> None:
404 """Modify a data_stream using the contents of actions
406 :param client: A client connection object
407 :param actions: The actions to take
409 :type client: :py:class:`~.elasticsearch.Elasticsearch`
410 :type actions: dict
411 """
412 try:
413 client.indices.modify_data_stream(actions=actions)
414 except BadRequestError as exc:
415 logger.error(
416 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc
417 )
418 raise MissingIndex(
419 'Missing either data_stream or index', exc, f'actions: {actions}'
420 )
423def report_segment_count(client: 'Elasticsearch', index: str) -> str:
424 """
425 Report the count of segments from index
427 :param client: A client connection object
428 :param index: The index to check
430 :type client: :py:class:`~.elasticsearch.Elasticsearch`
431 :type index: str
433 :returns: Formatted message describing shard count and segment count for index
434 """
435 shardcount = 0
436 segmentcount = 0
437 try:
438 output = client.cat.shards(
439 index=index, format='json', h=['index', 'shard', 'prirep', 'sc']
440 )
441 except Exception as exc:
442 logger.error('Exception: %s', exc)
443 raise BadClientResult('Unable to get cat shards output', exc)
444 for shard in output:
445 if shard['prirep'] == 'r': # type: ignore
446 # Skip replica shards
447 continue
448 if index != shard['index']: # type: ignore
449 logger.warning(
450 'Index name %s does not match what was returned by the _cat API: %s',
451 index,
452 shard['index'], # type: ignore
453 )
454 shardcount += 1
455 segmentcount += int(shard['sc']) # type: ignore
456 logger.debug(
457 'Index %s, shard %s has %s segments',
458 index,
459 shard["shard"], # type: ignore
460 shard["sc"], # type: ignore
461 )
463 return (
464 f'index {index} has {shardcount} shards and a total of {segmentcount} '
465 f'segments, averaging {float(segmentcount/shardcount)} segments per shard'
466 )
469def get_settings(client: 'Elasticsearch', index: str) -> t.Dict:
470 """Get the settings for an index
472 :param client: A client connection object
473 :param index: The index to check
475 :type client: :py:class:`~.elasticsearch.Elasticsearch`
476 :type index: str
478 :returns: The settings object for the named index
479 """
480 logger.debug('Getting settings for index: %s', index)
481 try:
482 response = dict(
483 client.indices.get_settings(
484 index=index, expand_wildcards=['open', 'hidden']
485 )
486 )
487 logger.debug(response)
488 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
489 logger.error("Index: '%s' not found. Error: %s", index, err)
490 raise MissingIndex(f'Index "{index}" not found', err, index)
491 logger.debug('Index settings collected.')
492 return response
495def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None:
496 """Modify a data_stream using the contents of actions
498 :param client: A client connection object
499 :param settings: The index settings to apply
501 :type client: :py:class:`~.elasticsearch.Elasticsearch`
502 :type settings: dict
503 """
504 try:
505 client.indices.put_settings(index=index, settings=settings)
506 except NotFoundError as exc:
507 logger.error("Index '%s' not found: %s", index, exc)
508 raise MissingIndex('Index not found', exc, index)
509 except BadRequestError as exc:
510 logger.error("Bad settings: %s. ERROR: %s", settings, exc)
511 raise BadClientResult(f'Invalid settings: {settings}', exc)
514def get_progress_doc(
515 client: 'Elasticsearch',
516 index_name: str,
517 job_id: str,
518 task_id: str,
519 stepname: str = '',
520) -> t.Dict:
521 """Get a task tracking doc
523 :param client: A client connection object
524 :param index_name: The index name
525 :param job_id: The job name string for the present redaction run
526 :param task_id: The task_id string of the task we are searching for
527 :param stepname: [Optional] The step name string of the step we are searching for
529 :type client: :py:class:`~.elasticsearch.Elasticsearch`
530 :type index_name: str
531 :type job_id: str
532 :type task_id: str
533 :type stepname: str
535 :returns: The progress tracking document from the progress/status tracking index
536 for the task or step
537 """
538 # Base value for stub (task)
539 stub = f'Task: {task_id} of Job: {job_id}'
540 # The proto query
541 query = {
542 "bool": {
543 "must": {"parent_id": {"type": "task", "id": job_id}},
544 "filter": [],
545 }
546 }
547 # The base value of the bool filter (task)
548 filters = [
549 {"term": {"task": task_id}},
550 {"term": {"job": job_id}},
551 ]
552 if not stepname:
553 logger.info('Tracking progress for %s', stub)
554 # For Tasks progress docs, we must not match docs with a step field
555 query['bool']['must_not'] = {"exists": {"field": "step"}}
556 else:
557 # Update stub to be for a step
558 stub = f'Step: {stepname} of Task: {task_id} of Job: {job_id}'
559 logger.info('Tracking progress for %s', stub)
560 # Update filters to include step
561 filters.append({"term": {"step": stepname}})
562 # Add the filters to the query
563 query['bool']['filter'] = filters # type: ignore
564 try:
565 result = do_search(client, index_pattern=index_name, query=query)
566 except NotFoundError as err:
567 msg = f'Tracking index {index_name} is missing'
568 logger.critical(msg)
569 raise MissingIndex(msg, err, index_name)
570 # First get the edge case of multiple hits out of the way
571 if result['hits']['total']['value'] > 1:
572 msg = f'Tracking document for {stub} is not unique. This should never happen.'
573 logger.critical(msg)
574 raise FatalError(msg, ValueError())
575 # After the > 1 test, if we don't have exactly 1 hit, we have zero hits
576 if result['hits']['total']['value'] != 1:
577 msg = f'Tracking document for {stub} does not exist'
578 missing = f'A document with step: {stepname}, task: {task_id}, job: {job_id}'
579 logger.debug(msg)
580 raise MissingDocument(msg, Exception(), missing)
581 # There can be only one...
582 return result['hits']['hits'][0]
585def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict:
586 """Get the progress/status tracking doc for the provided job_id
588 :param client: A client connection object
589 :param index_name: The index name
590 :param job_id: The job_id string for the present redaction run
592 :type client: :py:class:`~.elasticsearch.Elasticsearch`
593 :type index_name: str
594 :type job_id: str
596 :returns: The tracking document from the progress/status tracking index
597 """
598 if not index_exists(client, index_name):
599 msg = f'Tracking index {index_name} is missing'
600 logger.critical(msg)
601 raise MissingIndex(msg, Exception(), index_name)
602 try:
603 doc = dict(client.get(index=index_name, id=job_id))
604 # logger.debug('TRACKING DOC = %s', doc)
605 except NotFoundError as exc:
606 msg = f'Tracking document for job_id {job_id} does not exist'
607 logger.debug(msg)
608 raise MissingDocument(msg, exc, job_id)
609 return doc['_source']
612def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse':
613 """Test whether index ``index_name`` exists
615 :param client: A client connection object
616 :param index_name: The index name
618 :type client: :py:class:`~.elasticsearch.Elasticsearch`
619 :type index_name: str
621 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise
622 ``HeadApiResponse(False)``
623 """
624 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden'])
627def job_exists(
628 client: 'Elasticsearch', index_name: str, job_id: str
629) -> 'HeadApiResponse':
630 """Test whether a document exists for the present job_id
632 :param client: A client connection object
633 :param index_name: The index name
634 :param job_id: The job_id string for the present redaction run
636 :type client: :py:class:`~.elasticsearch.Elasticsearch`
637 :type index_name: str
638 :type job_id: str
640 :returns: ``HeadApiResponse(True)`` if a document exists with the present
641 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)``
642 """
643 return client.exists(index=index_name, id=job_id)
646def mount_index(var: 'DotMap') -> None:
647 """Mount index as a searchable snapshot
649 :param var: A collection of variables from
650 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var`
652 :type var: DotMap
653 """
654 response = {}
655 msg = (
656 f'Mounting {var.redaction_target} renamed as {var.mount_name} '
657 f'from repository: {var.repository}, snapshot: {var.new_snap_name} '
658 f'with storage={var.storage}'
659 )
660 logger.debug(msg)
661 while index_exists(var.client, var.mount_name):
662 logger.warning('Index %s exists. Deleting before remounting', var.mount_name)
663 delete_index(var.client, var.mount_name)
664 time.sleep(3.0)
665 try:
666 response = dict(
667 var.client.searchable_snapshots.mount(
668 repository=var.repository,
669 snapshot=var.new_snap_name,
670 index=var.redaction_target,
671 renamed_index=var.mount_name,
672 storage=var.storage,
673 )
674 )
675 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
676 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err)
677 logger.debug(response)
678 raise BadClientResult('Error when mount index attempted', err)
679 logger.info('Ensuring searchable snapshot mount is in "green" health state...')
680 try:
681 es_waiter(
682 var.client,
683 Index,
684 action='mount',
685 index=var.mount_name,
686 pause=PAUSE_VALUE,
687 timeout=30.0,
688 )
689 except BadClientResult as exc:
690 logger.error('Exception: %s', exc)
691 raise FatalError('Failed to mount index from snapshot', exc)
692 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name)
695def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict:
696 """Resolve an index
698 :param client: A client connection object
699 :param index: The index name
701 :type client: :py:class:`~.elasticsearch.Elasticsearch`
702 :type index: str
704 :returns: The return value from
705 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index`
706 :rtype: dict
707 """
708 logger.debug('Resolving index: %s', index)
709 try:
710 response = dict(
711 client.indices.resolve_index(
712 name=index, expand_wildcards=['open', 'hidden']
713 )
714 )
715 logger.debug(response)
716 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
717 logger.error("Index: '%s' not found. Error: %s", index, err)
718 raise MissingIndex(f'Index "{index}" not found', err, index)
719 logger.debug('Index resolved.')
720 return response
723def restore_index(
724 client: 'Elasticsearch',
725 repo_name: str,
726 snap_name: str,
727 index_name: str,
728 replacement: str,
729 re_pattern: str = '(.+)',
730 index_settings: t.Union[str, None] = None,
731) -> None:
732 """Restore an index
734 :param client: A client connection object
735 :param repo_name: The repository name
736 :param snap_name: The snapshot name
737 :param index_name: The index name as it appears in the snapshot metadata
738 :param replacement: The name or substitution string to use as the restored index
739 name
740 :param re_pattern: The optional rename pattern for use with ``replacement``
741 :param index_settings: Any settings to apply to the restored index, such as
742 _tier_preference
744 :type client: :py:class:`~.elasticsearch.Elasticsearch`
745 :type repo_name: str
746 :type snap_name: str
747 :type index_name: str
748 :type replacement: str
749 :type re_pattern: str
750 :type index_settings: dict
751 """
752 msg = (
753 f"repository={repo_name}, snapshot={snap_name}, indices={index_name},"
754 f"include_aliases=False,"
755 f"ignore_index_settings=["
756 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias',"
757 f" 'index.routing.allocation.include._tier_preference'],"
758 f"index_settings={index_settings},"
759 f"rename_pattern={re_pattern},"
760 f"rename_replacement={replacement},"
761 f"wait_for_completion=False"
762 )
763 logger.debug('RESTORE settings: %s', msg)
764 try:
765 response = client.snapshot.restore(
766 repository=repo_name,
767 snapshot=snap_name,
768 indices=index_name,
769 include_aliases=False,
770 ignore_index_settings=[
771 'index.lifecycle.name',
772 'index.lifecycle.rollover_alias',
773 'index.routing.allocation.include._tier_preference',
774 ],
775 index_settings=index_settings, # type: ignore
776 rename_pattern=re_pattern,
777 rename_replacement=replacement,
778 wait_for_completion=False,
779 )
780 logger.debug('Response = %s', response)
781 logger.info('Checking if restoration completed...')
782 try:
783 es_waiter(client, Restore, index_list=[replacement], **WAITKW)
784 except BadClientResult as bad:
785 logger.error('Exception: %s', bad)
786 raise BadClientResult('Failed to restore index from snapshot', bad)
787 msg = f'Restoration of index {index_name} as {replacement} complete'
788 logger.info(msg)
789 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
790 msg = (
791 f'Restoration of index {index_name} as {replacement} yielded an error: '
792 f'{err}'
793 )
794 logger.error(msg)
795 raise BadClientResult(msg, err)
796 # verify index is green
797 logger.info('Ensuring restored index is in "green" health state...')
798 res = dict(client.cluster.health(index=replacement, filter_path='status'))
799 logger.debug('res = %s', res)
800 if res['status'] == 'red':
801 msg = f'Restored index {replacement} is not in a healthy state'
802 logger.error(msg)
803 raise ValueMismatch(msg, 'index health is "red"', 'green or yellow')
806def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None:
807 """Redact data from an index using a painless script.
809 Collect the task_id and wait for the reinding job to complete before returning
811 :param client: A client connection object
812 :param index_name: The index to act on
813 :param config: The config block being iterated. Contains ``query``, ``message``,
814 and ``fields``
816 :type client: :py:class:`~.elasticsearch.Elasticsearch`
817 :type index_name: str
818 :type config: dict
819 """
820 logger.debug('Begin redaction...')
821 logger.info('Before update by query, %s', report_segment_count(client, index_name))
822 logger.debug('Updating and redacting data...')
823 script = build_script(config['message'], config['fields'])
824 response = {}
825 try:
826 response = dict(
827 client.update_by_query(
828 index=index_name,
829 script=script,
830 query=config['query'],
831 wait_for_completion=False,
832 expand_wildcards=['open', 'hidden'],
833 )
834 )
835 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
836 logger.critical('update_by_query yielded an error: %s', err)
837 raise FatalError('update_by_query API call failed', err)
838 logger.debug('Checking update by query status...')
839 logger.debug('response = %s', response)
840 # task_check = Task(client, action='update_by_query', task_id=response['task'])
841 try:
842 # task_check.wait()
843 es_waiter(
844 client, Task, action='update_by_query', task_id=response['task'], **WAITKW
845 )
846 except BadClientResult as exc:
847 logger.error('Exception: %s', exc)
848 raise FatalError('Failed to complete update by query', exc)
849 logger.info('After update by query, %s', report_segment_count(client, index_name))
850 logger.debug('Update by query completed.')
853def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict:
854 """Remove any ILM policy associated with index
856 :param client: A client connection object
857 :param index: The index
859 :type client: :py:class:`~.elasticsearch.Elasticsearch`
860 :type index: str
862 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}``
863 """
864 try:
865 response = dict(client.ilm.remove_policy(index=index))
866 logger.debug(response)
867 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
868 logger.error("Index: '%s' not found. Error: %s", index, err)
869 raise MissingIndex(f'Index "{index}" not found', err, index)
870 return response
873def take_snapshot(
874 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str
875) -> None:
876 """
877 Take snapshot of index
879 :param client: A client connection object
880 :param repo_name: The repository name
881 :param snap_name: The snapshot name
882 :param index_name: The name of the index to snapshot
884 :type client: :py:class:`~.elasticsearch.Elasticsearch`
885 :type repo_name: str
886 :type snap_name: str
887 :type index_name: str
888 """
889 logger.info('Creating new snapshot...')
890 response = {}
891 try:
892 response = dict(
893 client.snapshot.create(
894 repository=repo_name,
895 snapshot=snap_name,
896 indices=index_name,
897 wait_for_completion=False,
898 )
899 )
900 logger.debug('Snapshot response: %s', response)
901 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err:
902 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}'
903 logger.critical(msg)
904 raise BadClientResult(msg, err)
905 logger.info('Checking on status of snapshot...')
906 # snapshot_check = Snapshot(
907 # client, snapshot=snap_name, repository=repo_name, **WAITKW
908 # )
910 try:
911 # snapshot_check.wait()
912 es_waiter(client, Snapshot, snapshot=snap_name, repository=repo_name, **WAITKW)
913 except BadClientResult as exc:
914 logger.error('Exception: %s', exc)
915 raise FatalError('Failed to complete index snapshot', exc)
916 msg = (
917 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} '
918 f'succeeded.'
919 )
920 logger.info(msg)
923def update_doc(
924 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0
925) -> None:
926 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc``
928 :param client: A client connection object
929 :param index: The index to write to
930 :param doc_id: The document doc_id to update
931 :param doc: The contents of the document
932 :param routing: Because our tracking doc is using parent/child relationships, we
933 need to route. We use an integer, but the API calls expect a string, so we
934 manually cast this value in the API call as one.
936 :type client: :py:class:`~.elasticsearch.Elasticsearch`
937 :type index: str
938 :type doc_id: str
939 :type doc: dict
940 :type routing: int
941 """
942 try:
943 if doc_id:
944 _ = client.update(
945 index=index,
946 id=doc_id,
947 doc=doc,
948 doc_as_upsert=True,
949 routing=str(routing),
950 refresh=True,
951 )
952 else:
953 logger.debug('No value for document id. Creating new document.')
954 _ = client.index(
955 index=index, document=doc, routing=str(routing), refresh=True
956 )
957 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
958 msg = f'Error updating document: {err.args[0]}'
959 logger.error(msg)
960 raise BadClientResult(msg, err)
963def verify_index(client: 'Elasticsearch', index: str) -> bool:
964 """Verify the index exists and is an index, not an alias
966 :param client: A client connection object
967 :param index: The index to check
969 :type client: :py:class:`~.elasticsearch.Elasticsearch`
970 :type index: str
971 """
972 logger.debug('Verifying index: %s', index)
973 retval = True
974 response = {}
975 try:
976 response = dict(
977 client.indices.get_settings(
978 index=index, expand_wildcards=['open', 'hidden']
979 )
980 )
981 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
982 logger.error("Index: '%s' not found. Error: %s", index, err)
983 retval = False
984 logger.debug(response)
985 if len(list(response.keys())) > 1:
986 # We have more than one key, that means we hit an alias
987 logger.error('Index %s is one member of an alias.', index)
988 retval = False
989 elif list(response.keys())[0] != index:
990 # There's a 1 to 1 alias, but it is not the index name
991 logger.error('Index %s is an alias.', index)
992 retval = False
993 return retval