Coverage for src/es_fieldusage/main.py: 83%
125 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 18:26 -0600
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 18:26 -0600
1"""Main app definition"""
3# pylint: disable=R0902
4import typing as t
5import logging
6from es_client.helpers.config import get_client
7from es_fieldusage.helpers import utils as u
8from es_fieldusage.exceptions import ResultNotExpected, ValueMismatch
11class FieldUsage:
12 """Main Class"""
14 def __init__(self, configdict: t.Dict[str, t.Any], search_pattern: str) -> None:
15 self.logger = logging.getLogger(__name__)
16 self.client = get_client(configdict=configdict)
17 self.usage_stats = {}
18 self.indices_data = []
19 self.per_index_data = {}
20 self.results_data = {}
21 self.report_data = {}
22 self.per_index_report_data = {}
23 self.logger.info(
24 f"Initializing FieldUsage with search pattern: {search_pattern}"
25 )
26 self.get(search_pattern)
28 def get(self, search_pattern: str) -> None:
29 """
30 Get ``raw_data`` from the field_usage_stats API for all indices in
31 ``search_pattern`` Iterate over ``raw_data`` to build ``self.usage_stats``
32 """
33 try:
34 field_usage = self.client.indices.field_usage_stats(index=search_pattern)
35 except Exception as exc:
36 self.logger.error(f"Unable to get field usage: {exc}")
37 raise ResultNotExpected(f'Unable to get field usage: {exc}') from exc
38 for index in list(field_usage.keys()):
39 if index == '_shards':
40 # Ignore this key as it is "global"
41 continue
42 self.usage_stats[index] = self.sum_index_stats(field_usage, index)
44 def get_field_mappings(self, idx: str) -> t.Dict[str, t.Any]:
45 """
46 Return only the field mappings for index ``idx`` (not the entire index
47 mapping)
48 """
49 return dict(
50 self.client.indices.get_mapping(index=idx)[idx]['mappings']['properties']
51 )
53 def populate_values(
54 self, idx: str, data: t.Dict[t.Any, t.Any]
55 ) -> t.Dict[t.Any, t.Any]:
56 """Now add the field usage values for idx to data and return the result"""
57 for field in list(self.usage_stats[idx].keys()):
58 if '.' in field: 58 ↛ 59line 58 didn't jump to line 59 because the condition on line 58 was never true
59 path = tuple(field.split('.'))
60 else:
61 path = field
62 data[path] = self.usage_stats[idx][field]
63 return data
65 def get_resultset(self, idx: str) -> t.Dict[t.Any, t.Any]:
66 """Populate a result set with the fields in the index mapping"""
67 result = {}
68 if idx in self.usage_stats: 68 ↛ 71line 68 didn't jump to line 71 because the condition on line 68 was always true
69 allfields = u.convert_mapping(self.get_field_mappings(idx))
70 result = self.populate_values(idx, allfields)
71 return result
73 def merge_results(self, idx: str) -> t.Dict[str, t.Any]:
74 """Merge field usage data with index mapping"""
75 retval = {}
76 data = self.get_resultset(idx)
77 for path in u.iterate_paths(data):
78 value = u.get_value_from_path(data, path)
79 key = '.'.join(u.detuple(path))
80 retval[key] = value
81 return retval
83 def verify_single_index(self, index: t.Optional[str] = None) -> str:
84 """
85 Ensure the index count is 1 for certain methods
86 If no index provided, and only one is in self.indices, use that one
87 """
88 if index is None:
89 if isinstance(self.indices, list): 89 ↛ 90line 89 didn't jump to line 90 because the condition on line 89 was never true
90 if len(self.indices) > 1:
91 self.logger.warning(f"Too many indices found: {self.indices}")
92 msg = (
93 f'Too many indices found. Indicate single index for result, '
94 f'or use results for all indices. Found: {self.indices}'
95 )
96 raise ValueMismatch(msg)
97 if len(self.indices) < 1:
98 raise ValueMismatch('No indices found.')
99 else:
100 index = self.indices
101 return index
103 @property
104 def per_index_report(self) -> t.Dict[str, t.Any]:
105 """Generate summary report data"""
106 if not self.per_index_report_data: 106 ↛ 118line 106 didn't jump to line 118 because the condition on line 106 was always true
107 self.report_data['indices'] = self.indices
108 self.report_data['field_count'] = len(self.results.keys())
109 for idx in list(self.results_by_index.keys()):
110 self.per_index_report_data[idx] = {}
111 self.per_index_report_data[idx]['accessed'] = {}
112 self.per_index_report_data[idx]['unaccessed'] = {}
113 for key, value in self.results_by_index[idx].items():
114 if value == 0: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true
115 self.per_index_report_data[idx]['unaccessed'][key] = value
116 else:
117 self.per_index_report_data[idx]['accessed'][key] = value
118 return self.per_index_report_data
120 @property
121 def report(self) -> t.Dict[str, t.Any]:
122 """Generate summary report data"""
123 if not self.report_data: 123 ↛ 133line 123 didn't jump to line 133 because the condition on line 123 was always true
124 self.report_data['indices'] = self.indices
125 self.report_data['field_count'] = len(self.results.keys())
126 self.report_data['accessed'] = {}
127 self.report_data['unaccessed'] = {}
128 for key, value in self.results.items():
129 if value == 0: 129 ↛ 130line 129 didn't jump to line 130 because the condition on line 129 was never true
130 self.report_data['unaccessed'][key] = value
131 else:
132 self.report_data['accessed'][key] = value
133 return self.report_data
135 def result(self, idx: t.Optional[str] = None) -> t.Dict[str, t.Any]:
136 """Return a single index result as a dictionary"""
137 idx = self.verify_single_index(index=idx)
138 return u.sort_by_value(self.merge_results(idx))
140 @property
141 def results_by_index(self) -> t.Dict[str, t.Dict[str, t.Any]]:
142 """
143 Return all results as a dictionary, with the index name as the root key,
144 and all stats for that index as the value, which is a dictionary generated
145 by ``self.result()``.
146 """
147 if not self.per_index_data:
148 if not isinstance(self.indices, list): 148 ↛ 151line 148 didn't jump to line 151 because the condition on line 148 was always true
149 idx_list = [self.indices]
150 else:
151 idx_list = self.indices
152 for idx in idx_list:
153 self.per_index_data[idx] = self.result(idx=idx)
154 return self.per_index_data
156 @property
157 def results(self) -> t.Dict[str, t.Any]:
158 """Return results for all indices found with values summed per mapping leaf"""
159 # The summing re-orders things so it needs to be re-sorted
160 if not self.results_data:
161 _ = u.sort_by_value(u.sum_dict_values(self.results_by_index))
162 self.results_data = dict(_)
163 return self.results_data
165 @property
166 def indices(self) -> t.Union[str, t.List[str]]:
167 """Return all indices found"""
168 if not self.indices_data:
169 self.indices_data = list(self.usage_stats.keys())
170 if len(self.indices_data) == 1: 170 ↛ 172line 170 didn't jump to line 172 because the condition on line 170 was always true
171 return self.indices_data[0]
172 return self.indices_data
174 def sum_index_stats(
175 self, field_usage: t.Dict[str, t.Any], idx: str
176 ) -> t.Dict[str, int]:
177 """Per field, sum all of the usage stats for all shards in ``idx``"""
179 def appender(result, field, value):
180 if field not in result: 180 ↛ 183line 180 didn't jump to line 183 because the condition on line 180 was always true
181 result[field] = value
182 else:
183 result[field] += value
184 return result
186 result = {}
187 for shard in field_usage[idx]['shards']:
188 for field in list(shard['stats']['fields'].keys()):
189 if field in ['_id', '_source']: 189 ↛ 192line 189 didn't jump to line 192 because the condition on line 189 was never true
190 # We don't care about these because these can be used by
191 # runtime queries
192 continue
193 result = appender(result, field, shard['stats']['fields'][field]['any'])
194 return result