Coverage for src/es_fieldusage/helpers/utils.py: 98%
73 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 17:59 -0600
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 17:59 -0600
1"""Utility helper functions"""
3import typing as t
4from collections import defaultdict
5from functools import reduce
6from itertools import chain
7from operator import getitem, itemgetter
8import click
9from es_fieldusage.exceptions import ConfigurationException
12def convert_mapping(
13 data: t.Dict[str, t.Any], new_dict: t.Optional[t.Dict[str, t.Any]] = None
14) -> t.Dict[str, t.Any]:
15 """
16 Convert an Elasticsearch mapping into a dictionary more closely approximating
17 the one coming from the field usage API.
19 Receive the mapping dict as ``data``
20 Strip out "properties" keys. They are not in the field_usage stats paths.
21 Set the value at the end of each dict path to 0 (we merge counts from field
22 usage later)
23 """
24 if new_dict is None:
25 new_dict = {}
26 retval = {}
27 for key, value in data.items():
28 new_dict[key] = value
29 if isinstance(value, dict): 29 ↛ 27line 29 didn't jump to line 27 because the condition on line 29 was always true
30 if 'properties' in value:
31 new_dict[key] = value['properties']
32 retval[key] = convert_mapping(new_dict[key], new_dict={})
33 else:
34 retval[key] = 0
35 return retval
38def detuple(path: t.List[t.Any]) -> t.List[t.Any]:
39 """If we used a tuple to access a dict path, we fix it to be a list again here"""
40 if len(path) == 1 and isinstance(path[0], tuple):
41 return list(path[0])
42 return path
45def get_value_from_path(data: t.Dict[str, t.Any], path: t.List[t.Any]) -> t.Any:
46 """
47 Return value from dict ``data``. Recreate all keys from list ``path``
48 """
49 return reduce(getitem, path, data)
52def iterate_paths(
53 data: t.Dict[str, t.Any], path: t.Optional[t.List[str]] = None
54) -> t.Generator[t.List[str], None, None]:
55 """Recursively extract all paths from a dictionary"""
56 if path is None:
57 path = []
58 for key, value in data.items():
59 newpath = path + [key]
60 if isinstance(value, dict):
61 for subkey in iterate_paths(value, newpath):
62 yield subkey
63 else:
64 yield newpath
67def output_report(search_pattern: str, report: t.Dict[str, t.Any]) -> None:
68 """Output summary report data to command-line/console"""
69 # Title
70 click.secho('\nSummary Report', overline=True, underline=True, bold=True)
71 click.secho('\nSearch Pattern: ', nl=False)
72 # Search Pattern
73 click.secho(search_pattern, bold=True)
74 # Indices Found
75 if not isinstance(report['indices'], list):
76 click.secho('Index Found: ', nl=False)
77 click.secho(f'{report["indices"]}', bold=True)
78 else:
79 click.secho(f'{len(report["indices"])} ', bold=True, nl=False)
80 click.secho('Indices Found: ', nl=False)
81 if len(report['indices']) > 3:
82 click.secho('(data too big)', bold=True)
83 else:
84 click.secho(f'{report["indices"]}', bold=True)
85 # Total Fields
86 click.secho('Total Fields Found: ', nl=False)
87 click.secho(report['field_count'], bold=True)
88 # Accessed Fields
89 click.secho('Accessed Fields: ', nl=False)
90 click.secho(len(report['accessed'].keys()), bold=True)
91 # Unaccessed Fields
92 click.secho('Unaccessed Fields: ', nl=False)
93 click.secho(len(report['unaccessed'].keys()), bold=True)
96def override_settings(
97 data: t.Dict[str, t.Any], new_data: t.Dict[str, t.Any]
98) -> t.Dict[str, t.Any]:
99 """Override keys in data with values matching in new_data"""
100 if not isinstance(new_data, dict):
101 raise ConfigurationException('new_data must be of type dict')
102 for key in list(new_data.keys()):
103 if key in data: 103 ↛ 102line 103 didn't jump to line 102 because the condition on line 103 was always true
104 data[key] = new_data[key]
105 return data
108def passthrough(func: t.Callable) -> t.Callable:
109 """Wrapper to make it easy to store click configuration elsewhere"""
110 return lambda a, k: func(*a, **k)
113def sort_by_name(data: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
114 """Sort dictionary by key alphabetically"""
115 return dict(sorted(data.items(), key=itemgetter(0)))
118def sort_by_value(data: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
119 """Sort dictionary by root key value, descending"""
120 return dict(sorted(data.items(), key=itemgetter(1), reverse=True))
123def sum_dict_values(data: t.Dict[str, t.Dict[str, t.Any]]) -> t.Dict[str, int]:
124 """Sum the values of data dict(s) into a new defaultdict"""
125 # Sets up result to have every dictionary key be an integer by default
126 result = defaultdict(int)
127 dlist = []
128 for _, value in data.items():
129 dlist.append(value)
130 for key, value in chain.from_iterable(d.items() for d in dlist):
131 result[key] += int(value)
132 return sort_by_name(dict(result))