Coverage for src/es_fieldusage/commands.py: 42%

192 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-26 18:22 -0600

1"""Sub-commands for Click CLI""" 

2 

3# pylint: disable=R0913,R0914,R0917 

4import typing as t 

5import os 

6from datetime import datetime, timezone 

7import json 

8import logging 

9from pathlib import Path 

10import click 

11from es_client.helpers import config as escl 

12from es_client.helpers.utils import option_wrapper 

13from es_fieldusage.defaults import OPTS, FILEPATH_OVERRIDE, EPILOG 

14from es_fieldusage.exceptions import FatalException 

15from es_fieldusage.helpers.utils import output_report 

16from es_fieldusage.main import FieldUsage 

17 

18SHW = {'on': 'show-', 'off': 'hide-'} 

19TRU = {'default': True} 

20WRP = option_wrapper() 

21 

22 

23def get_per_index(field_usage: FieldUsage, per_index: bool) -> t.Dict[str, t.Any]: 

24 """Return the per_index data set for reporting""" 

25 logger = logging.getLogger(__name__) 

26 if per_index: 

27 try: 

28 all_data = field_usage.per_index_report 

29 except Exception as exc: 

30 logger.critical(f'Unable to get per_index_report data: {exc}') 

31 raise FatalException from exc 

32 else: 

33 all_data = { 

34 'all_indices': { 

35 'accessed': field_usage.report['accessed'], 

36 'unaccessed': field_usage.report['unaccessed'], 

37 } 

38 } 

39 return all_data 

40 

41 

42def format_delimiter(value: str) -> str: 

43 """Return a formatted delimiter""" 

44 delimiter = '' 

45 if value == ':': 

46 delimiter = f'{value} ' 

47 elif value == '=': 

48 delimiter = f' {value} ' 

49 else: 

50 delimiter = value 

51 return delimiter 

52 

53 

54def header_msg(msg: str, show: bool) -> str: 

55 """Return the message to show if show is True""" 

56 if not show: 

57 msg = '' 

58 return msg 

59 

60 

61def is_docker() -> bool: 

62 """ 

63 :rtype: bool 

64 :returns: Boolean result of whether we are runinng in a Docker container or not 

65 """ 

66 cgroup = Path("/proc/self/cgroup") 

67 return ( 

68 Path("/.dockerenv").is_file() 

69 or cgroup.is_file() 

70 and "docker" in cgroup.read_text(encoding="utf8") 

71 ) 

72 

73 

74def printout(data: t.Dict[str, t.Any], show_counts: bool, raw_delimiter: str) -> None: 

75 """Print output to stdout based on the provided values""" 

76 for line in output_generator(data, show_counts, raw_delimiter): 

77 # Since the generator is adding newlines, we set nl=False here 

78 click.secho(line, nl=False) 

79 

80 

81def output_generator( 

82 data: t.Dict[str, t.Any], show_counts: bool, raw_delimiter: str 

83) -> t.Generator[str, None, None]: 

84 """Generate output iterator based on the provided values""" 

85 delimiter = format_delimiter(raw_delimiter) 

86 for key, value in data.items(): 

87 line = '' 

88 if show_counts: 

89 line = f'{key}{delimiter}{value}' 

90 else: 

91 line = f'{key}' 

92 # In order to write newlines to a file descriptor, they must be part of 

93 # the line 

94 yield f'{line}\n' 

95 

96 

97def override_filepath() -> t.Dict[str, str]: 

98 """Override the default filepath if we're running Docker""" 

99 if is_docker(): 

100 return {'default': FILEPATH_OVERRIDE} 

101 return {} 

102 

103 

104@click.command(epilog=EPILOG) 

105@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) 

106@WRP(*escl.cli_opts('headers', settings=OPTS, onoff=SHW)) 

107@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW)) 

108@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW)) 

109@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW)) 

110@WRP(*escl.cli_opts('delimiter', settings=OPTS)) 

111@click.argument('search_pattern', type=str, nargs=1) 

112@click.pass_context 

113def stdout( 

114 ctx: click.Context, 

115 show_report: bool, 

116 show_headers: bool, 

117 show_accessed: bool, 

118 show_unaccessed: bool, 

119 show_counts: bool, 

120 delimiter: str, 

121 search_pattern: str, 

122) -> None: 

123 """ 

124 Display field usage information on the console for SEARCH_PATTERN 

125 

126 $ es-fieldusage stdout [OPTIONS] SEARCH_PATTERN 

127 

128 This is powerful if you want to pipe the output through grep for only certain 

129 fields or patterns: 

130 

131 $ es-fieldusage stdout --hide-report --hide-headers --show-unaccessed 'index-*' \ 

132 | grep process 

133 """ 

134 logger = logging.getLogger(__name__) 

135 try: 

136 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) 

137 except Exception as exc: 

138 logger.critical(f'Exception encountered: {exc}') 

139 raise FatalException from exc 

140 if show_report: 

141 output_report(search_pattern, field_usage.report) 

142 if show_accessed: 

143 msg = header_msg('\nAccessed Fields (in descending frequency):', show_headers) 

144 click.secho(msg, overline=show_headers, underline=show_headers, bold=True) 

145 printout(field_usage.report['accessed'], show_counts, delimiter) 

146 if show_unaccessed: 

147 msg = header_msg('\nUnaccessed Fields', show_headers) 

148 click.secho(msg, overline=show_headers, underline=show_headers, bold=True) 

149 printout(field_usage.report['unaccessed'], show_counts, delimiter) 

150 

151 

152@click.command(epilog=EPILOG) 

153@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) 

154@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU)) 

155@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU)) 

156@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW, override=TRU)) 

157@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'})) 

158@WRP(*escl.cli_opts('filepath', settings=OPTS, override=override_filepath())) 

159@WRP(*escl.cli_opts('prefix', settings=OPTS)) 

160@WRP(*escl.cli_opts('suffix', settings=OPTS)) 

161@WRP(*escl.cli_opts('delimiter', settings=OPTS)) 

162@click.argument('search_pattern', type=str, nargs=1) 

163@click.pass_context 

164def file( 

165 ctx: click.Context, 

166 show_report: bool, 

167 show_accessed: bool, 

168 show_unaccessed: bool, 

169 show_counts: bool, 

170 per_index: bool, 

171 filepath: str, 

172 prefix: str, 

173 suffix: str, 

174 delimiter: str, 

175 search_pattern: str, 

176) -> None: 

177 """ 

178 Write field usage information to file for SEARCH_PATTERN 

179 

180 $ es_fieldusage file [OPTIONS] SEARCH_PATTERN 

181 

182 When writing to file, the filename will be {prefix}-{INDEXNAME}.{suffix} 

183 where INDEXNAME will be the name of the index if the --per-index option is 

184 used, or 'all_indices' if not. 

185 

186 This allows you to write to one file per index automatically, should that 

187 be your desire. 

188 """ 

189 logger = logging.getLogger(__name__) 

190 try: 

191 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) 

192 except Exception as exc: 

193 logger.critical(f'Exception encountered: {exc}') 

194 raise FatalException from exc 

195 if show_report: 

196 output_report(search_pattern, field_usage.report) 

197 click.secho() 

198 

199 all_data = get_per_index(field_usage, per_index) 

200 

201 files_written = [] 

202 for idx in list(all_data.keys()): 

203 fname = f'{prefix}-{idx}.{suffix}' 

204 filename = os.path.join(filepath, fname) 

205 

206 # if the file already exists, remove it first so we don't append to old 

207 # data below 

208 if os.path.exists(filename): 

209 os.remove(filename) 

210 

211 # JSON output can be done from a dictionary. In order to preserve the 

212 # ability to show/hide accessed & unaccessed, I need a clean dictionary 

213 output = {} 

214 files_written.append(fname) 

215 for key, boolval in { 

216 'accessed': show_accessed, 

217 'unaccessed': show_unaccessed, 

218 }.items(): 

219 if boolval: 

220 output.update(all_data[idx][key]) 

221 if not suffix == 'json': 

222 generator = output_generator( 

223 all_data[idx][key], show_counts, delimiter 

224 ) 

225 with open(filename, 'a', encoding='utf-8') as fdesc: 

226 fdesc.writelines(generator) 

227 # Now we write output as a JSON object, if we selected that 

228 if suffix == 'json': 

229 with open(filename, 'a', encoding='utf-8') as fdesc: 

230 json.dump(output, fdesc, indent=2) 

231 fdesc.write('\n') 

232 click.secho('Number of files written: ', nl=False) 

233 click.secho(len(files_written), bold=True) 

234 click.secho('Filenames: ', nl=False) 

235 if len(files_written) > 3: 

236 click.secho(files_written[0:3], bold=True, nl=False) 

237 click.secho(' ... (too many to show)') 

238 else: 

239 click.secho(files_written, bold=True) 

240 

241 

242@click.command(epilog=EPILOG) 

243@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) 

244@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU)) 

245@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU)) 

246@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'})) 

247@WRP(*escl.cli_opts('indexname', settings=OPTS)) 

248@click.argument('search_pattern', type=str, nargs=1) 

249@click.pass_context 

250def index( 

251 ctx: click.Context, 

252 show_report: bool, 

253 show_accessed: bool, 

254 show_unaccessed: bool, 

255 per_index: bool, 

256 indexname: str, 

257 search_pattern: str, 

258) -> None: 

259 """ 

260 Write field usage information to file for SEARCH_PATTERN 

261 

262 $ es_fieldusage index [OPTIONS] SEARCH_PATTERN 

263 

264 This will write a document per fieldname per index found in SEARCH_PATTERN 

265 to INDEXNAME, where the JSON structure is: 

266 

267 { 

268 "index": SOURCEINDEXNAME, 

269 "field": { 

270 "name": "FIELDNAME", 

271 "count": COUNT 

272 } 

273 } 

274 """ 

275 logger = logging.getLogger(__name__) 

276 logger.debug(f'indexname = {indexname}') 

277 timestamp = f"{datetime.now(timezone.utc).isoformat().split('.')[0]}.000Z" 

278 try: 

279 field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) 

280 except Exception as exc: 

281 logger.critical(f'Exception encountered: {exc}') 

282 raise FatalException from exc 

283 # client = field_usage.client 

284 if show_report: 

285 output_report(search_pattern, field_usage.report) 

286 click.secho() 

287 

288 all_data = get_per_index(field_usage, per_index) 

289 

290 # TESTING 

291 fname = 'testing' 

292 filepath = os.getcwd() 

293 filename = os.path.join(filepath, fname) 

294 

295 # If the file already exists, remove it so we don't append to old data 

296 if os.path.exists(filename): 

297 os.remove(filename) 

298 # END TESTING 

299 

300 output = [] 

301 for idx in list(all_data.keys()): 

302 for key, boolval in { 

303 'accessed': show_accessed, 

304 'unaccessed': show_unaccessed, 

305 }.items(): 

306 if boolval: 

307 for fieldname, value in all_data[idx][key].items(): 

308 obj = { 

309 '@timestamp': timestamp, 

310 'index': idx, 

311 'field': {'name': fieldname, 'count': value}, 

312 } 

313 output.append(obj) 

314 

315 # TESTING 

316 with open(filename, 'a', encoding='utf-8') as fdesc: 

317 json.dump(output, fdesc, indent=2) 

318 fdesc.write('\n') 

319 # END TESTING 

320 

321 

322@click.command(epilog=EPILOG) 

323@click.argument('search_pattern', type=str, nargs=1) 

324@click.pass_context 

325def show_indices(ctx: click.Context, search_pattern: str) -> None: 

326 """ 

327 Show indices on the console matching SEARCH_PATTERN 

328 

329 $ es-fieldusage show_indices SEARCH_PATTERN 

330 

331 This is included as a way to ensure you are seeing the indices you expect 

332 before using the file or stdout commands. 

333 """ 

334 logger = logging.getLogger(__name__) 

335 try: 

336 client = escl.get_client(configdict=ctx.obj['configdict']) 

337 except Exception as exc: 

338 logger.critical(f'Exception encountered: {exc}') 

339 raise FatalException from exc 

340 cat = client.cat.indices(index=search_pattern, h='index', format='json') 

341 indices = [] 

342 for item in cat: 

343 indices.append(item['index']) 

344 indices.sort() 

345 # Output 

346 # Search Pattern 

347 click.secho('\nSearch Pattern', nl=False, overline=True, underline=True, bold=True) 

348 click.secho(f': {search_pattern}', bold=True) 

349 # Indices Found 

350 if len(indices) == 1: 

351 click.secho('\nIndex Found', nl=False, overline=True, underline=True, bold=True) 

352 click.secho(f': {indices[0]}', bold=True) 

353 else: 

354 click.secho( 

355 f'\n{len(indices)} ', overline=True, underline=True, bold=True, nl=False 

356 ) 

357 click.secho('Indices Found', overline=True, underline=True, bold=True, nl=False) 

358 click.secho(': ') 

359 for idx in indices: 

360 click.secho(idx)