Coverage for src/es_fieldusage/commands.py: 42%
192 statements
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 18:22 -0600
« prev ^ index » next coverage.py v7.7.1, created at 2025-03-26 18:22 -0600
1"""Sub-commands for Click CLI"""
3# pylint: disable=R0913,R0914,R0917
4import typing as t
5import os
6from datetime import datetime, timezone
7import json
8import logging
9from pathlib import Path
10import click
11from es_client.helpers import config as escl
12from es_client.helpers.utils import option_wrapper
13from es_fieldusage.defaults import OPTS, FILEPATH_OVERRIDE, EPILOG
14from es_fieldusage.exceptions import FatalException
15from es_fieldusage.helpers.utils import output_report
16from es_fieldusage.main import FieldUsage
18SHW = {'on': 'show-', 'off': 'hide-'}
19TRU = {'default': True}
20WRP = option_wrapper()
23def get_per_index(field_usage: FieldUsage, per_index: bool) -> t.Dict[str, t.Any]:
24 """Return the per_index data set for reporting"""
25 logger = logging.getLogger(__name__)
26 if per_index:
27 try:
28 all_data = field_usage.per_index_report
29 except Exception as exc:
30 logger.critical(f'Unable to get per_index_report data: {exc}')
31 raise FatalException from exc
32 else:
33 all_data = {
34 'all_indices': {
35 'accessed': field_usage.report['accessed'],
36 'unaccessed': field_usage.report['unaccessed'],
37 }
38 }
39 return all_data
42def format_delimiter(value: str) -> str:
43 """Return a formatted delimiter"""
44 delimiter = ''
45 if value == ':':
46 delimiter = f'{value} '
47 elif value == '=':
48 delimiter = f' {value} '
49 else:
50 delimiter = value
51 return delimiter
54def header_msg(msg: str, show: bool) -> str:
55 """Return the message to show if show is True"""
56 if not show:
57 msg = ''
58 return msg
61def is_docker() -> bool:
62 """
63 :rtype: bool
64 :returns: Boolean result of whether we are runinng in a Docker container or not
65 """
66 cgroup = Path("/proc/self/cgroup")
67 return (
68 Path("/.dockerenv").is_file()
69 or cgroup.is_file()
70 and "docker" in cgroup.read_text(encoding="utf8")
71 )
74def printout(data: t.Dict[str, t.Any], show_counts: bool, raw_delimiter: str) -> None:
75 """Print output to stdout based on the provided values"""
76 for line in output_generator(data, show_counts, raw_delimiter):
77 # Since the generator is adding newlines, we set nl=False here
78 click.secho(line, nl=False)
81def output_generator(
82 data: t.Dict[str, t.Any], show_counts: bool, raw_delimiter: str
83) -> t.Generator[str, None, None]:
84 """Generate output iterator based on the provided values"""
85 delimiter = format_delimiter(raw_delimiter)
86 for key, value in data.items():
87 line = ''
88 if show_counts:
89 line = f'{key}{delimiter}{value}'
90 else:
91 line = f'{key}'
92 # In order to write newlines to a file descriptor, they must be part of
93 # the line
94 yield f'{line}\n'
97def override_filepath() -> t.Dict[str, str]:
98 """Override the default filepath if we're running Docker"""
99 if is_docker():
100 return {'default': FILEPATH_OVERRIDE}
101 return {}
104@click.command(epilog=EPILOG)
105@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW))
106@WRP(*escl.cli_opts('headers', settings=OPTS, onoff=SHW))
107@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW))
108@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW))
109@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW))
110@WRP(*escl.cli_opts('delimiter', settings=OPTS))
111@click.argument('search_pattern', type=str, nargs=1)
112@click.pass_context
113def stdout(
114 ctx: click.Context,
115 show_report: bool,
116 show_headers: bool,
117 show_accessed: bool,
118 show_unaccessed: bool,
119 show_counts: bool,
120 delimiter: str,
121 search_pattern: str,
122) -> None:
123 """
124 Display field usage information on the console for SEARCH_PATTERN
126 $ es-fieldusage stdout [OPTIONS] SEARCH_PATTERN
128 This is powerful if you want to pipe the output through grep for only certain
129 fields or patterns:
131 $ es-fieldusage stdout --hide-report --hide-headers --show-unaccessed 'index-*' \
132 | grep process
133 """
134 logger = logging.getLogger(__name__)
135 try:
136 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern)
137 except Exception as exc:
138 logger.critical(f'Exception encountered: {exc}')
139 raise FatalException from exc
140 if show_report:
141 output_report(search_pattern, field_usage.report)
142 if show_accessed:
143 msg = header_msg('\nAccessed Fields (in descending frequency):', show_headers)
144 click.secho(msg, overline=show_headers, underline=show_headers, bold=True)
145 printout(field_usage.report['accessed'], show_counts, delimiter)
146 if show_unaccessed:
147 msg = header_msg('\nUnaccessed Fields', show_headers)
148 click.secho(msg, overline=show_headers, underline=show_headers, bold=True)
149 printout(field_usage.report['unaccessed'], show_counts, delimiter)
152@click.command(epilog=EPILOG)
153@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW))
154@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU))
155@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU))
156@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW, override=TRU))
157@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'}))
158@WRP(*escl.cli_opts('filepath', settings=OPTS, override=override_filepath()))
159@WRP(*escl.cli_opts('prefix', settings=OPTS))
160@WRP(*escl.cli_opts('suffix', settings=OPTS))
161@WRP(*escl.cli_opts('delimiter', settings=OPTS))
162@click.argument('search_pattern', type=str, nargs=1)
163@click.pass_context
164def file(
165 ctx: click.Context,
166 show_report: bool,
167 show_accessed: bool,
168 show_unaccessed: bool,
169 show_counts: bool,
170 per_index: bool,
171 filepath: str,
172 prefix: str,
173 suffix: str,
174 delimiter: str,
175 search_pattern: str,
176) -> None:
177 """
178 Write field usage information to file for SEARCH_PATTERN
180 $ es_fieldusage file [OPTIONS] SEARCH_PATTERN
182 When writing to file, the filename will be {prefix}-{INDEXNAME}.{suffix}
183 where INDEXNAME will be the name of the index if the --per-index option is
184 used, or 'all_indices' if not.
186 This allows you to write to one file per index automatically, should that
187 be your desire.
188 """
189 logger = logging.getLogger(__name__)
190 try:
191 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern)
192 except Exception as exc:
193 logger.critical(f'Exception encountered: {exc}')
194 raise FatalException from exc
195 if show_report:
196 output_report(search_pattern, field_usage.report)
197 click.secho()
199 all_data = get_per_index(field_usage, per_index)
201 files_written = []
202 for idx in list(all_data.keys()):
203 fname = f'{prefix}-{idx}.{suffix}'
204 filename = os.path.join(filepath, fname)
206 # if the file already exists, remove it first so we don't append to old
207 # data below
208 if os.path.exists(filename):
209 os.remove(filename)
211 # JSON output can be done from a dictionary. In order to preserve the
212 # ability to show/hide accessed & unaccessed, I need a clean dictionary
213 output = {}
214 files_written.append(fname)
215 for key, boolval in {
216 'accessed': show_accessed,
217 'unaccessed': show_unaccessed,
218 }.items():
219 if boolval:
220 output.update(all_data[idx][key])
221 if not suffix == 'json':
222 generator = output_generator(
223 all_data[idx][key], show_counts, delimiter
224 )
225 with open(filename, 'a', encoding='utf-8') as fdesc:
226 fdesc.writelines(generator)
227 # Now we write output as a JSON object, if we selected that
228 if suffix == 'json':
229 with open(filename, 'a', encoding='utf-8') as fdesc:
230 json.dump(output, fdesc, indent=2)
231 fdesc.write('\n')
232 click.secho('Number of files written: ', nl=False)
233 click.secho(len(files_written), bold=True)
234 click.secho('Filenames: ', nl=False)
235 if len(files_written) > 3:
236 click.secho(files_written[0:3], bold=True, nl=False)
237 click.secho(' ... (too many to show)')
238 else:
239 click.secho(files_written, bold=True)
242@click.command(epilog=EPILOG)
243@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW))
244@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU))
245@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU))
246@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'}))
247@WRP(*escl.cli_opts('indexname', settings=OPTS))
248@click.argument('search_pattern', type=str, nargs=1)
249@click.pass_context
250def index(
251 ctx: click.Context,
252 show_report: bool,
253 show_accessed: bool,
254 show_unaccessed: bool,
255 per_index: bool,
256 indexname: str,
257 search_pattern: str,
258) -> None:
259 """
260 Write field usage information to file for SEARCH_PATTERN
262 $ es_fieldusage index [OPTIONS] SEARCH_PATTERN
264 This will write a document per fieldname per index found in SEARCH_PATTERN
265 to INDEXNAME, where the JSON structure is:
267 {
268 "index": SOURCEINDEXNAME,
269 "field": {
270 "name": "FIELDNAME",
271 "count": COUNT
272 }
273 }
274 """
275 logger = logging.getLogger(__name__)
276 logger.debug(f'indexname = {indexname}')
277 timestamp = f"{datetime.now(timezone.utc).isoformat().split('.')[0]}.000Z"
278 try:
279 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern)
280 except Exception as exc:
281 logger.critical(f'Exception encountered: {exc}')
282 raise FatalException from exc
283 # client = field_usage.client
284 if show_report:
285 output_report(search_pattern, field_usage.report)
286 click.secho()
288 all_data = get_per_index(field_usage, per_index)
290 # TESTING
291 fname = 'testing'
292 filepath = os.getcwd()
293 filename = os.path.join(filepath, fname)
295 # If the file already exists, remove it so we don't append to old data
296 if os.path.exists(filename):
297 os.remove(filename)
298 # END TESTING
300 output = []
301 for idx in list(all_data.keys()):
302 for key, boolval in {
303 'accessed': show_accessed,
304 'unaccessed': show_unaccessed,
305 }.items():
306 if boolval:
307 for fieldname, value in all_data[idx][key].items():
308 obj = {
309 '@timestamp': timestamp,
310 'index': idx,
311 'field': {'name': fieldname, 'count': value},
312 }
313 output.append(obj)
315 # TESTING
316 with open(filename, 'a', encoding='utf-8') as fdesc:
317 json.dump(output, fdesc, indent=2)
318 fdesc.write('\n')
319 # END TESTING
322@click.command(epilog=EPILOG)
323@click.argument('search_pattern', type=str, nargs=1)
324@click.pass_context
325def show_indices(ctx: click.Context, search_pattern: str) -> None:
326 """
327 Show indices on the console matching SEARCH_PATTERN
329 $ es-fieldusage show_indices SEARCH_PATTERN
331 This is included as a way to ensure you are seeing the indices you expect
332 before using the file or stdout commands.
333 """
334 logger = logging.getLogger(__name__)
335 try:
336 client = escl.get_client(configdict=ctx.obj['configdict'])
337 except Exception as exc:
338 logger.critical(f'Exception encountered: {exc}')
339 raise FatalException from exc
340 cat = client.cat.indices(index=search_pattern, h='index', format='json')
341 indices = []
342 for item in cat:
343 indices.append(item['index'])
344 indices.sort()
345 # Output
346 # Search Pattern
347 click.secho('\nSearch Pattern', nl=False, overline=True, underline=True, bold=True)
348 click.secho(f': {search_pattern}', bold=True)
349 # Indices Found
350 if len(indices) == 1:
351 click.secho('\nIndex Found', nl=False, overline=True, underline=True, bold=True)
352 click.secho(f': {indices[0]}', bold=True)
353 else:
354 click.secho(
355 f'\n{len(indices)} ', overline=True, underline=True, bold=True, nl=False
356 )
357 click.secho('Indices Found', overline=True, underline=True, bold=True, nl=False)
358 click.secho(': ')
359 for idx in indices:
360 click.secho(idx)