Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/pii/lib/python3.12/site-packages/es_pii_tool/helpers/elastic_api.py: 66%
349 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-10-01 16:39 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2024-10-01 16:39 -0600
1"""Functions making Elasticsearch API calls"""
3from os import getenv
4import typing as t
5import logging
6from elasticsearch8.exceptions import (
7 ApiError,
8 NotFoundError,
9 TransportError,
10 BadRequestError,
11)
12from es_wait import Index, Restore, Snapshot, Task
13from es_pii_tool.defaults import (
14 PAUSE_DEFAULT,
15 PAUSE_ENVVAR,
16 TIMEOUT_DEFAULT,
17 TIMEOUT_ENVVAR,
18)
19from es_pii_tool.exceptions import (
20 BadClientResult,
21 FatalError,
22 MissingDocument,
23 MissingError,
24 MissingIndex,
25 ValueMismatch,
26)
27from es_pii_tool.helpers.utils import build_script, check_fields, es_waiter
29if t.TYPE_CHECKING:
30 from dotmap import DotMap # type: ignore
31 from elasticsearch8 import Elasticsearch
32 from elastic_transport import HeadApiResponse
34PAUSE_VALUE = float(getenv(PAUSE_ENVVAR, default=PAUSE_DEFAULT))
35TIMEOUT_VALUE = float(getenv(TIMEOUT_ENVVAR, default=TIMEOUT_DEFAULT))
36WAITKW = {'pause': PAUSE_VALUE, 'timeout': TIMEOUT_VALUE}
38logger = logging.getLogger(__name__)
40# pylint: disable=R0913,W0707
43def assign_alias(client: 'Elasticsearch', index_name: str, alias_name: str) -> None:
44 """Assign index to alias(es)"""
45 try:
46 response = client.indices.put_alias(index=index_name, name=alias_name)
47 logger.info(
48 "Index '%s' was successfully added to alias '%s'", index_name, alias_name
49 )
50 logger.debug(response)
51 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
52 msg = f'Attempt to assign index "{index_name}" to alias "{alias_name}" failed'
53 logger.critical(msg)
54 raise BadClientResult(msg, err)
57def check_index(client: 'Elasticsearch', index_name: str, job_config: t.Dict) -> None:
58 """Check the index"""
59 logger.info('Making a quick check on redacted index docs...')
60 result = do_search(client, index_name, job_config['query'])
61 if result['hits']['total']['value'] == 0:
62 logger.warning(
63 'Query returned no results, assuming it only returns docs '
64 'to be redacted and not already redacted...'
65 )
66 return
67 success = check_fields(result, job_config)
68 if not success:
69 msg = 'One or more fields were not redacted. Check the logs'
70 logger.error(msg)
71 raise ValueMismatch(msg, 'count of fields matching query is not 0', '0')
74def clear_cache(client: 'Elasticsearch', index_name: str) -> None:
75 """Clear the cache for named index
77 :param client: A client connection object
78 :param index_name: The index name
80 :type client: :py:class:`~.elasticsearch.Elasticsearch`
81 :type index_name: str
83 :returns: No return value
84 :rtype: None
85 """
86 response = {}
87 logger.info('Clearing cache data for %s...', index_name)
88 try:
89 response = dict(
90 client.indices.clear_cache(
91 index=index_name, expand_wildcards=['open', 'hidden']
92 )
93 )
94 logger.debug(response)
95 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
96 logger.error('clear_cache API call resulted in an error: %s', err)
99def close_index(client: 'Elasticsearch', name: str) -> None:
100 """Close an index
102 :param name: The index name to close
104 :type name: str
105 """
106 try:
107 response = client.indices.close(index=name, expand_wildcards=['open', 'hidden'])
108 logger.debug(response)
109 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
110 logger.error("Index: '%s' not found. Error: %s", name, err)
111 raise MissingIndex(f'Index "{name}" not found', err, name)
114def create_index(
115 client: 'Elasticsearch',
116 name: str,
117 mappings: t.Union[t.Dict, None] = None,
118 settings: t.Union[t.Dict, None] = None,
119) -> None:
120 """Create an Elasticsearch index with associated mappings and settings
122 :param name: The index name
123 :param mappings: The index mappings
124 :param settings: The index settings
126 :type name: str
127 :type mappings: dict
128 :type settings: dict
129 """
130 if index_exists(client, name):
131 logger.info('Index %s already exists', name)
132 return
133 try:
134 response = client.indices.create(
135 index=name, settings=settings, mappings=mappings
136 )
137 logger.debug(response)
138 except BadRequestError as err:
139 logger.error("Index: '%s' already exists. Error: %s", name, err)
140 raise BadClientResult(f'Index "{name}" already exists', err)
141 except (ApiError, TransportError) as err:
142 logger.error("Unknown error trying to create index: '%s'. Error: %s", name, err)
143 raise BadClientResult(f'Unknown error trying to create index: {name}', err)
146def delete_index(client: 'Elasticsearch', name: str) -> None:
147 """Delete an index
149 :param client: A client connection object
150 :param name: The index name to delete
152 :type name: str
153 """
154 try:
155 response = client.indices.delete(
156 index=name, expand_wildcards=['open', 'hidden']
157 )
158 logger.debug(response)
159 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
160 # logger.error("Index: '%s' not found. Error: %s", name, err)
161 raise MissingIndex(f'Index "{name}" not found', err, name)
164def do_search(
165 client: 'Elasticsearch', index_pattern: str, query: t.Dict, size: int = 10
166) -> t.Dict:
167 """Return search result of ``query`` against ``index_pattern``
169 :param client: A client connection object
170 :param index_pattern: A single index name, a csv list of indices, or other pattern
171 :param query: An Elasticsearch DSL search query
172 :param size: Maximum number of results to return
174 :type client: :py:class:`~.elasticsearch.Elasticsearch`
175 :type index_pattern: str
176 :type query: dict
177 :type size: int
178 """
179 try:
180 response = dict(
181 client.search(
182 index=index_pattern,
183 query=query,
184 size=size,
185 expand_wildcards=['open', 'hidden'],
186 )
187 )
188 logger.debug(response)
189 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
190 msg = f'Attempt to collect search results yielded an exception: {err}'
191 logger.critical(msg)
192 raise BadClientResult(msg, err)
193 return response
196def forcemerge_index(
197 client: 'Elasticsearch',
198 index: t.Union[str, None] = None,
199 max_num_segments: int = 1,
200 only_expunge_deletes: bool = False,
201) -> None:
202 """
203 Force Merge an index
205 :param client: A client connection object
206 :param index: A single index name
207 :param max_num_segments: The maximum number of segments per shard after a
208 force merge
209 :param only_expunge_deletes: Only expunge deleted docs during force merging.
210 If True, ignores max_num_segments.
212 :type client: :py:class:`~.elasticsearch.Elasticsearch`
213 :type index: str
214 :type max_num_segments: int
215 :type only_expunge_deletes: bool
216 """
217 kwargs = {'index': index, 'wait_for_completion': False}
218 if only_expunge_deletes:
219 kwargs.update({'only_expunge_deletes': only_expunge_deletes})
220 else:
221 kwargs.update({'max_num_segments': max_num_segments}) # type: ignore
222 try:
223 response = dict(client.indices.forcemerge(**kwargs)) # type: ignore
224 logger.debug(response)
225 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
226 logger.error("Index: '%s' not found. Error: %s", index, err)
227 raise MissingIndex(f'Index "{index}" not found', err, index) # type: ignore
228 logger.info('Waiting for forcemerge to complete...')
229 # task_check = Task(
230 # client,
231 # action='forcemerge',
232 # task_id=response['task'],
233 # pause=PAUSE_VALUE,
234 # timeout=TIMEOUT_VALUE,
235 # )
236 try:
237 # task_check.wait()
238 es_waiter(client, Task, action='forcemerge', task_id=response['task'], **WAITKW)
239 except BadClientResult as exc:
240 logger.error('Exception: %s', exc)
241 raise FatalError('Failed to forcemerge', exc)
242 logger.info('Forcemerge completed.')
245def generic_get(func: t.Callable, **kwargs) -> t.Dict:
246 """Generic, reusable client request getter"""
247 try:
248 response = dict(func(**kwargs))
249 logger.debug(response)
250 except NotFoundError as nferr:
251 raise MissingError('Generic Get MissingError', nferr, nferr.info)
252 except (ApiError, TransportError, BadRequestError) as err:
253 raise BadClientResult('Generic Get BadClientResult Failure', err)
254 return response
257def get_hits(client: 'Elasticsearch', index: str, query: t.Dict) -> int:
258 """Return the number of hits matching the query
260 :param client: A client connection object
261 :param index: The index or pattern to search
262 :param query: The query to execute
264 :type client: :py:class:`~.elasticsearch.Elasticsearch`
265 :type index: str
266 :type query: dict
268 :returns: The number of hits matching the query
269 """
270 result = do_search(client, index, query)
271 return result['hits']['total']['value']
274def get_ilm(client: 'Elasticsearch', index: str) -> t.Dict:
275 """Get the ILM lifecycle settings for an index
277 :param client: A client connection object
278 :param index: The index to check
280 :type client: :py:class:`~.elasticsearch.Elasticsearch`
281 :type index: str
283 :returns: The ILM settings object for the named index
284 """
285 try:
286 response = dict(client.ilm.explain_lifecycle(index=index))
287 logger.debug(response)
288 except NotFoundError as err:
289 logger.error("Index: '%s' not found. Error: %s", index, err)
290 raise MissingIndex(f'Index "{index}" not found', err, index)
291 return response
294def get_ilm_lifecycle(client: 'Elasticsearch', policyname: str) -> t.Dict:
295 """Get the ILM lifecycle settings for an policyname
297 :param client: A client connection object
298 :param policyname: The ILM policy name to check
300 :type client: :py:class:`~.elasticsearch.Elasticsearch`
301 :type policyname: str
303 :returns: The ILM settings object for the named policy, or None
304 """
305 retval = {}
306 try:
307 retval = dict(client.ilm.get_lifecycle(name=policyname))
308 except NotFoundError:
309 logger.debug("ILM policy '%s' not found.", policyname)
310 return retval
313def get_index(client: 'Elasticsearch', index: str) -> t.Dict:
314 """Get the info about an index
316 :param client: A client connection object
317 :param index: The index, csv indices, or index pattern to get
319 :type client: :py:class:`~.elasticsearch.Elasticsearch`
320 :type index: str
322 :returns: The index information object for the named index
323 """
324 try:
325 response = dict(
326 client.indices.get(index=index, expand_wildcards=['open', 'hidden'])
327 )
328 logger.debug('Found indices: %s', list(response.keys()))
329 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
330 logger.error("Index: '%s' not found. Error: %s", index, err)
331 raise MissingIndex(f'Index "{index}" not found', err, index)
332 return response
335def get_phase(client: 'Elasticsearch', index: str) -> t.Union[str, None]:
336 """Get the index's ILM phase
338 :param client: A client connection object
339 :param index: The index name
341 :type client: :py:class:`~.elasticsearch.Elasticsearch`
342 :type index: str
344 :returns: The ILM phase of ``index``
345 """
346 phase = None
347 ilm = get_ilm(client, index)
348 try:
349 phase = ilm['indices'][index]['phase']
350 except KeyError: # Perhaps in cold/frozen but not ILM affiliated
351 settings = get_settings(client, index)[index]['settings']['index']
352 if "store" in settings:
353 # Checking if it's a mounted searchable snapshot
354 if settings["store"]["type"] == "snapshot":
355 phase = get_phase_from_tier_pref(settings)
356 else:
357 phase = None
358 return phase
361def get_phase_from_tier_pref(
362 idx_settings: t.Dict,
363) -> t.Union[t.Literal['frozen', 'cold'], None]:
364 """
365 Check the index's ``_tier_preference`` as an indicator which phase the index is in
367 :param idx_settings: The results from a
368 get_settings(index=idx)[idx]['settings']['index'] call
370 :returns: The ILM phase based on the index settings, or None
371 """
372 try:
373 tiers = idx_settings['routing']['allocation']['include']['_tier_preference']
374 except KeyError:
375 tiers = ''
376 if tiers == 'data_frozen':
377 return 'frozen'
378 if 'data_cold' in tiers.split(','):
379 return 'cold'
380 return None
383def ilm_move(
384 client: 'Elasticsearch', name: str, current_step: t.Dict, next_step: t.Dict
385) -> None:
386 """Move index 'name' from the current step to the next step"""
387 try:
388 client.ilm.move_to_step(
389 index=name, current_step=current_step, next_step=next_step
390 )
391 except Exception as err:
392 msg = (
393 f'Unable to move index {name} to ILM next step: {next_step}. '
394 f'Error: {err}'
395 )
396 logger.critical(msg)
397 raise BadClientResult(msg, err)
400def modify_data_stream(
401 client: 'Elasticsearch', actions: t.Sequence[t.Mapping[str, t.Any]]
402) -> None:
403 """Modify a data_stream using the contents of actions
405 :param client: A client connection object
406 :param actions: The actions to take
408 :type client: :py:class:`~.elasticsearch.Elasticsearch`
409 :type actions: dict
410 """
411 try:
412 client.indices.modify_data_stream(actions=actions)
413 except BadRequestError as exc:
414 logger.error(
415 "Unable to modify data_stream using actions='%s'. ERROR: %s", actions, exc
416 )
417 raise MissingIndex(
418 'Missing either data_stream or index', exc, f'actions: {actions}'
419 )
422def report_segment_count(client: 'Elasticsearch', index: str) -> str:
423 """
424 Report the count of segments from index
426 :param client: A client connection object
427 :param index: The index to check
429 :type client: :py:class:`~.elasticsearch.Elasticsearch`
430 :type index: str
432 :returns: Formatted message describing shard count and segment count for index
433 """
434 shardcount = 0
435 segmentcount = 0
436 try:
437 output = client.cat.shards(
438 index=index, format='json', h=['index', 'shard', 'prirep', 'sc']
439 )
440 except Exception as exc:
441 logger.error('Exception: %s', exc)
442 raise BadClientResult('Unable to get cat shards output', exc)
443 for shard in output:
444 if shard['prirep'] == 'r': # type: ignore
445 # Skip replica shards
446 continue
447 if index != shard['index']: # type: ignore
448 logger.warning(
449 'Index name %s does not match what was returned by the _cat API: %s',
450 index,
451 shard['index'], # type: ignore
452 )
453 shardcount += 1
454 segmentcount += int(shard['sc']) # type: ignore
455 logger.debug(
456 'Index %s, shard %s has %s segments',
457 index,
458 shard["shard"], # type: ignore
459 shard["sc"], # type: ignore
460 )
462 return (
463 f'index {index} has {shardcount} shards and a total of {segmentcount} '
464 f'segments, averaging {float(segmentcount/shardcount)} segments per shard'
465 )
468def get_settings(client: 'Elasticsearch', index: str) -> t.Dict:
469 """Get the settings for an index
471 :param client: A client connection object
472 :param index: The index to check
474 :type client: :py:class:`~.elasticsearch.Elasticsearch`
475 :type index: str
477 :returns: The settings object for the named index
478 """
479 logger.debug('Getting settings for index: %s', index)
480 try:
481 response = dict(
482 client.indices.get_settings(
483 index=index, expand_wildcards=['open', 'hidden']
484 )
485 )
486 logger.debug(response)
487 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
488 logger.error("Index: '%s' not found. Error: %s", index, err)
489 raise MissingIndex(f'Index "{index}" not found', err, index)
490 logger.debug('Index settings collected.')
491 return response
494def put_settings(client: 'Elasticsearch', index: str, settings: dict) -> None:
495 """Modify a data_stream using the contents of actions
497 :param client: A client connection object
498 :param settings: The index settings to apply
500 :type client: :py:class:`~.elasticsearch.Elasticsearch`
501 :type settings: dict
502 """
503 try:
504 client.indices.put_settings(index=index, settings=settings)
505 except NotFoundError as exc:
506 logger.error("Index '%s' not found: %s", index, exc)
507 raise MissingIndex('Index not found', exc, index)
508 except BadRequestError as exc:
509 logger.error("Bad settings: %s. ERROR: %s", settings, exc)
510 raise BadClientResult(f'Invalid settings: {settings}', exc)
513def get_task_doc(
514 client: 'Elasticsearch', index_name: str, job_id: str, task_id: str
515) -> t.Dict:
516 """Get a task tracking doc
518 :param client: A client connection object
519 :param index_name: The index name
520 :param job_id: The job_id string for the present redaction run
521 :param task_id: The task_id string of the task we are searching for
523 :type client: :py:class:`~.elasticsearch.Elasticsearch`
524 :type index_name: str
525 :type job_id: str
526 :type task_id: str
528 :returns: The task tracking document from the progress/status tracking index
529 """
530 query = {
531 "bool": {
532 "must": {"parent_id": {"type": "task", "id": job_id}},
533 "filter": [{"term": {"task": task_id}}],
534 }
535 }
536 try:
537 result = do_search(client, index_pattern=index_name, query=query)
538 except NotFoundError as err:
539 msg = f'Tracking index {index_name} is missing'
540 logger.critical(msg)
541 raise MissingIndex(msg, err, index_name)
542 if result['hits']['total']['value'] != 1:
543 msg = 'Tracking document for job: {job_id}, task: {task_id} does not exist'
544 raise MissingDocument(msg, Exception(), msg)
545 return result['hits']['hits'][0]
548def get_tracking_doc(client: 'Elasticsearch', index_name: str, job_id: str) -> t.Dict:
549 """Get the progress/status tracking doc
551 :param client: A client connection object
552 :param index_name: The index name
553 :param job_id: The job_id string for the present redaction run
555 :type client: :py:class:`~.elasticsearch.Elasticsearch`
556 :type index_name: str
557 :type job_id: str
559 :returns: The tracking document from the progress/status tracking index
560 """
561 if not index_exists(client, index_name):
562 msg = f'Tracking index {index_name} is missing'
563 logger.critical(msg)
564 raise MissingIndex(msg, Exception(), index_name)
565 try:
566 doc = dict(client.get(index=index_name, id=job_id))
567 # logger.debug('TRACKING DOC = %s', doc)
568 except NotFoundError as exc:
569 msg = f'Tracking document for job_id {job_id} does not exist'
570 logger.debug(msg)
571 raise MissingDocument(msg, exc, job_id)
572 return doc['_source']
575def index_exists(client: 'Elasticsearch', index_name: str) -> 'HeadApiResponse':
576 """Test whether index ``index_name`` exists
578 :param client: A client connection object
579 :param index_name: The index name
581 :type client: :py:class:`~.elasticsearch.Elasticsearch`
582 :type index_name: str
584 :returns: ``HeadApiResponse(True)`` if ``index_name`` exists, otherwise
585 ``HeadApiResponse(False)``
586 """
587 return client.indices.exists(index=index_name, expand_wildcards=['open', 'hidden'])
590def job_exists(
591 client: 'Elasticsearch', index_name: str, job_id: str
592) -> 'HeadApiResponse':
593 """Test whether a document exists for the present job_id
595 :param client: A client connection object
596 :param index_name: The index name
597 :param job_id: The job_id string for the present redaction run
599 :type client: :py:class:`~.elasticsearch.Elasticsearch`
600 :type index_name: str
601 :type job_id: str
603 :returns: ``HeadApiResponse(True)`` if a document exists with the present
604 ``job_id`` exists in ``index_name``, otherwise ``HeadApiResponse(False)``
605 """
606 return client.exists(index=index_name, id=job_id)
609def mount_index(var: 'DotMap') -> None:
610 """Mount index as a searchable snapshot
612 :param var: A collection of variables from
613 :py:attr:`~.es_pii_tool.redacters.snapshot.RedactSnapshot.var`
615 :type var: DotMap
616 """
617 response = {}
618 msg = (
619 f'Mounting {var.redaction_target} renamed as {var.mount_name} '
620 f'from repository: {var.repository}, snapshot: {var.new_snap_name} '
621 f'with storage={var.storage}'
622 )
623 logger.debug(msg)
624 try:
625 response = dict(
626 var.client.searchable_snapshots.mount(
627 repository=var.repository,
628 snapshot=var.new_snap_name,
629 index=var.redaction_target,
630 renamed_index=var.mount_name,
631 storage=var.storage,
632 )
633 )
634 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
635 logger.error("Attempt to mount index '%s' failed: %s", var.mount_name, err)
636 logger.debug(response)
637 raise BadClientResult('Error when mount index attempted', err)
638 logger.info('Ensuring searchable snapshot mount is in "green" health state...')
639 try:
640 es_waiter(
641 var.client,
642 Index,
643 action='mount',
644 index=var.mount_name,
645 pause=PAUSE_VALUE,
646 timeout=30.0,
647 )
648 except BadClientResult as exc:
649 logger.error('Exception: %s', exc)
650 raise FatalError('Failed to mount index from snapshot', exc)
651 logger.info("Index '%s' mounted from snapshot succesfully", var.mount_name)
654def resolve_index(client: 'Elasticsearch', index: str) -> t.Dict:
655 """Resolve an index
657 :param client: A client connection object
658 :param index: The index name
660 :type client: :py:class:`~.elasticsearch.Elasticsearch`
661 :type index: str
663 :returns: The return value from
664 :py:meth:`~.elasticsearch.Elasticsearch.IndicesClient.resolve_index`
665 :rtype: dict
666 """
667 logger.debug('Resolving index: %s', index)
668 try:
669 response = dict(
670 client.indices.resolve_index(
671 name=index, expand_wildcards=['open', 'hidden']
672 )
673 )
674 logger.debug(response)
675 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
676 logger.error("Index: '%s' not found. Error: %s", index, err)
677 raise MissingIndex(f'Index "{index}" not found', err, index)
678 logger.debug('Index resolved.')
679 return response
682def restore_index(
683 client: 'Elasticsearch',
684 repo_name: str,
685 snap_name: str,
686 index_name: str,
687 replacement: str,
688 re_pattern: str = '(.+)',
689 index_settings: t.Union[str, None] = None,
690) -> None:
691 """Restore an index
693 :param client: A client connection object
694 :param repo_name: The repository name
695 :param snap_name: The snapshot name
696 :param index_name: The index name as it appears in the snapshot metadata
697 :param replacement: The name or substitution string to use as the restored index
698 name
699 :param re_pattern: The optional rename pattern for use with ``replacement``
700 :param index_settings: Any settings to apply to the restored index, such as
701 _tier_preference
703 :type client: :py:class:`~.elasticsearch.Elasticsearch`
704 :type repo_name: str
705 :type snap_name: str
706 :type index_name: str
707 :type replacement: str
708 :type re_pattern: str
709 :type index_settings: dict
710 """
711 msg = (
712 f"repository={repo_name}, snapshot={snap_name}, indices={index_name},"
713 f"include_aliases=False,"
714 f"ignore_index_settings=["
715 f" 'index.lifecycle.name', 'index.lifecycle.rollover_alias',"
716 f" 'index.routing.allocation.include._tier_preference'],"
717 f"index_settings={index_settings},"
718 f"rename_pattern={re_pattern},"
719 f"rename_replacement={replacement},"
720 f"wait_for_completion=False"
721 )
722 logger.debug('RESTORE settings: %s', msg)
723 try:
724 response = client.snapshot.restore(
725 repository=repo_name,
726 snapshot=snap_name,
727 indices=index_name,
728 include_aliases=False,
729 ignore_index_settings=[
730 'index.lifecycle.name',
731 'index.lifecycle.rollover_alias',
732 'index.routing.allocation.include._tier_preference',
733 ],
734 index_settings=index_settings, # type: ignore
735 rename_pattern=re_pattern,
736 rename_replacement=replacement,
737 wait_for_completion=False,
738 )
739 logger.debug('Response = %s', response)
740 logger.info('Checking if restoration completed...')
741 # restore_check = Restore(
742 # client, pause=PAUSE_VALUE, timeout=TIMEOUT_VALUE, index_list=[replacement]
743 # )
744 try:
745 es_waiter(client, Restore, index_list=[replacement], **WAITKW)
746 except BadClientResult as exc:
747 logger.error('Exception: %s', exc)
748 raise FatalError('Failed to restore index from snapshot', exc)
749 msg = f'Restoration of index {index_name} as {replacement} complete'
750 logger.info(msg)
751 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
752 msg = (
753 f'Restoration of index {index_name} as {replacement} yielded an error: '
754 f'{err}'
755 )
756 logger.error(msg)
757 raise BadClientResult(msg, err)
760def redact_from_index(client: 'Elasticsearch', index_name: str, config: t.Dict) -> None:
761 """Redact data from an index using a painless script.
763 Collect the task_id and wait for the reinding job to complete before returning
765 :param client: A client connection object
766 :param index_name: The index to act on
767 :param config: The config block being iterated. Contains ``query``, ``message``,
768 and ``fields``
770 :type client: :py:class:`~.elasticsearch.Elasticsearch`
771 :type index_name: str
772 :type config: dict
773 """
774 logger.debug('Begin redaction...')
775 logger.info('Before update by query, %s', report_segment_count(client, index_name))
776 logger.debug('Updating and redacting data...')
777 script = build_script(config['message'], config['fields'])
778 response = {}
779 try:
780 response = dict(
781 client.update_by_query(
782 index=index_name,
783 script=script,
784 query=config['query'],
785 wait_for_completion=False,
786 expand_wildcards=['open', 'hidden'],
787 )
788 )
789 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
790 logger.critical('update_by_query yielded an error: %s', err)
791 raise FatalError('update_by_query API call failed', err)
792 logger.debug('Checking update by query status...')
793 logger.debug('response = %s', response)
794 # task_check = Task(client, action='update_by_query', task_id=response['task'])
795 try:
796 # task_check.wait()
797 es_waiter(
798 client, Task, action='update_by_query', task_id=response['task'], **WAITKW
799 )
800 except BadClientResult as exc:
801 logger.error('Exception: %s', exc)
802 raise FatalError('Failed to complete update by query', exc)
803 logger.info('After update by query, %s', report_segment_count(client, index_name))
804 logger.debug('Update by query completed.')
807def remove_ilm_policy(client: 'Elasticsearch', index: str) -> t.Dict:
808 """Remove any ILM policy associated with index
810 :param client: A client connection object
811 :param index: The index
813 :type client: :py:class:`~.elasticsearch.Elasticsearch`
814 :type index: str
816 :returns: The response, e.g. ``{'has_failures': False, 'failed_indexes': []}``
817 """
818 try:
819 response = dict(client.ilm.remove_policy(index=index))
820 logger.debug(response)
821 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
822 logger.error("Index: '%s' not found. Error: %s", index, err)
823 raise MissingIndex(f'Index "{index}" not found', err, index)
824 return response
827def take_snapshot(
828 client: 'Elasticsearch', repo_name: str, snap_name: str, index_name: str
829) -> None:
830 """
831 Take snapshot of index
833 :param client: A client connection object
834 :param repo_name: The repository name
835 :param snap_name: The snapshot name
836 :param index_name: The name of the index to snapshot
838 :type client: :py:class:`~.elasticsearch.Elasticsearch`
839 :type repo_name: str
840 :type snap_name: str
841 :type index_name: str
842 """
843 logger.info('Creating new snapshot...')
844 response = {}
845 try:
846 response = dict(
847 client.snapshot.create(
848 repository=repo_name,
849 snapshot=snap_name,
850 indices=index_name,
851 wait_for_completion=False,
852 )
853 )
854 logger.debug('Snapshot response: %s', response)
855 except (ApiError, NotFoundError, TransportError, BadRequestError, KeyError) as err:
856 msg = f'Creation of snapshot "{snap_name}" resulted in an error: {err}'
857 logger.critical(msg)
858 raise BadClientResult(msg, err)
859 logger.info('Checking on status of snapshot...')
860 # snapshot_check = Snapshot(
861 # client, snapshot=snap_name, repository=repo_name, **WAITKW
862 # )
864 try:
865 # snapshot_check.wait()
866 es_waiter(client, Snapshot, snapshot=snap_name, repository=repo_name, **WAITKW)
867 except BadClientResult as exc:
868 logger.error('Exception: %s', exc)
869 raise FatalError('Failed to complete index snapshot', exc)
870 msg = (
871 f'{index_name}: Snapshot to repository {repo_name} in snapshot {snap_name} '
872 f'succeeded.'
873 )
874 logger.info(msg)
877def update_doc(
878 client: 'Elasticsearch', index: str, doc_id: str, doc: t.Dict, routing: int = 0
879) -> None:
880 """Upsert a document in ``index`` at ``doc_id`` with the values of ``doc``
882 :param client: A client connection object
883 :param index: The index to write to
884 :param doc_id: The document doc_id to update
885 :param doc: The contents of the document
886 :param routing: Because our tracking doc is using parent/child relationships, we
887 need to route. We use an integer, but the API calls expect a string, so we
888 manually cast this value in the API call as one.
890 :type client: :py:class:`~.elasticsearch.Elasticsearch`
891 :type index: str
892 :type doc_id: str
893 :type doc: dict
894 :type routing: int
895 """
896 try:
897 if doc_id:
898 _ = client.update(
899 index=index,
900 id=doc_id,
901 doc=doc,
902 doc_as_upsert=True,
903 routing=str(routing),
904 refresh=True,
905 )
906 else:
907 logger.debug('No value for document id. Creating new document.')
908 _ = client.index(
909 index=index, document=doc, routing=str(routing), refresh=True
910 )
911 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
912 msg = f'Error updating document: {err.args[0]}'
913 logger.error(msg)
914 raise BadClientResult(msg, err)
917def verify_index(client: 'Elasticsearch', index: str) -> bool:
918 """Verify the index exists and is an index, not an alias
920 :param client: A client connection object
921 :param index: The index to check
923 :type client: :py:class:`~.elasticsearch.Elasticsearch`
924 :type index: str
925 """
926 logger.debug('Verifying index: %s', index)
927 retval = True
928 response = {}
929 try:
930 response = dict(
931 client.indices.get_settings(
932 index=index, expand_wildcards=['open', 'hidden']
933 )
934 )
935 except (ApiError, NotFoundError, TransportError, BadRequestError) as err:
936 logger.error("Index: '%s' not found. Error: %s", index, err)
937 retval = False
938 logger.debug(response)
939 if len(list(response.keys())) > 1:
940 # We have more than one key, that means we hit an alias
941 logger.error('Index %s is one member of an alias.', index)
942 retval = False
943 elif list(response.keys())[0] != index:
944 # There's a 1 to 1 alias, but it is not the index name
945 logger.error('Index %s is an alias.', index)
946 retval = False
947 return retval