Coverage for src/es_fieldusage/helpers/utils.py: 98%

73 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-26 17:59 -0600

1"""Utility helper functions""" 

2 

3import typing as t 

4from collections import defaultdict 

5from functools import reduce 

6from itertools import chain 

7from operator import getitem, itemgetter 

8import click 

9from es_fieldusage.exceptions import ConfigurationException 

10 

11 

12def convert_mapping( 

13 data: t.Dict[str, t.Any], new_dict: t.Optional[t.Dict[str, t.Any]] = None 

14) -> t.Dict[str, t.Any]: 

15 """ 

16 Convert an Elasticsearch mapping into a dictionary more closely approximating 

17 the one coming from the field usage API. 

18 

19 Receive the mapping dict as ``data`` 

20 Strip out "properties" keys. They are not in the field_usage stats paths. 

21 Set the value at the end of each dict path to 0 (we merge counts from field 

22 usage later) 

23 """ 

24 if new_dict is None: 

25 new_dict = {} 

26 retval = {} 

27 for key, value in data.items(): 

28 new_dict[key] = value 

29 if isinstance(value, dict): 29 ↛ 27line 29 didn't jump to line 27 because the condition on line 29 was always true

30 if 'properties' in value: 

31 new_dict[key] = value['properties'] 

32 retval[key] = convert_mapping(new_dict[key], new_dict={}) 

33 else: 

34 retval[key] = 0 

35 return retval 

36 

37 

38def detuple(path: t.List[t.Any]) -> t.List[t.Any]: 

39 """If we used a tuple to access a dict path, we fix it to be a list again here""" 

40 if len(path) == 1 and isinstance(path[0], tuple): 

41 return list(path[0]) 

42 return path 

43 

44 

45def get_value_from_path(data: t.Dict[str, t.Any], path: t.List[t.Any]) -> t.Any: 

46 """ 

47 Return value from dict ``data``. Recreate all keys from list ``path`` 

48 """ 

49 return reduce(getitem, path, data) 

50 

51 

52def iterate_paths( 

53 data: t.Dict[str, t.Any], path: t.Optional[t.List[str]] = None 

54) -> t.Generator[t.List[str], None, None]: 

55 """Recursively extract all paths from a dictionary""" 

56 if path is None: 

57 path = [] 

58 for key, value in data.items(): 

59 newpath = path + [key] 

60 if isinstance(value, dict): 

61 for subkey in iterate_paths(value, newpath): 

62 yield subkey 

63 else: 

64 yield newpath 

65 

66 

67def output_report(search_pattern: str, report: t.Dict[str, t.Any]) -> None: 

68 """Output summary report data to command-line/console""" 

69 # Title 

70 click.secho('\nSummary Report', overline=True, underline=True, bold=True) 

71 click.secho('\nSearch Pattern: ', nl=False) 

72 # Search Pattern 

73 click.secho(search_pattern, bold=True) 

74 # Indices Found 

75 if not isinstance(report['indices'], list): 

76 click.secho('Index Found: ', nl=False) 

77 click.secho(f'{report["indices"]}', bold=True) 

78 else: 

79 click.secho(f'{len(report["indices"])} ', bold=True, nl=False) 

80 click.secho('Indices Found: ', nl=False) 

81 if len(report['indices']) > 3: 

82 click.secho('(data too big)', bold=True) 

83 else: 

84 click.secho(f'{report["indices"]}', bold=True) 

85 # Total Fields 

86 click.secho('Total Fields Found: ', nl=False) 

87 click.secho(report['field_count'], bold=True) 

88 # Accessed Fields 

89 click.secho('Accessed Fields: ', nl=False) 

90 click.secho(len(report['accessed'].keys()), bold=True) 

91 # Unaccessed Fields 

92 click.secho('Unaccessed Fields: ', nl=False) 

93 click.secho(len(report['unaccessed'].keys()), bold=True) 

94 

95 

96def override_settings( 

97 data: t.Dict[str, t.Any], new_data: t.Dict[str, t.Any] 

98) -> t.Dict[str, t.Any]: 

99 """Override keys in data with values matching in new_data""" 

100 if not isinstance(new_data, dict): 

101 raise ConfigurationException('new_data must be of type dict') 

102 for key in list(new_data.keys()): 

103 if key in data: 103 ↛ 102line 103 didn't jump to line 102 because the condition on line 103 was always true

104 data[key] = new_data[key] 

105 return data 

106 

107 

108def passthrough(func: t.Callable) -> t.Callable: 

109 """Wrapper to make it easy to store click configuration elsewhere""" 

110 return lambda a, k: func(*a, **k) 

111 

112 

113def sort_by_name(data: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: 

114 """Sort dictionary by key alphabetically""" 

115 return dict(sorted(data.items(), key=itemgetter(0))) 

116 

117 

118def sort_by_value(data: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: 

119 """Sort dictionary by root key value, descending""" 

120 return dict(sorted(data.items(), key=itemgetter(1), reverse=True)) 

121 

122 

123def sum_dict_values(data: t.Dict[str, t.Dict[str, t.Any]]) -> t.Dict[str, int]: 

124 """Sum the values of data dict(s) into a new defaultdict""" 

125 # Sets up result to have every dictionary key be an integer by default 

126 result = defaultdict(int) 

127 dlist = [] 

128 for _, value in data.items(): 

129 dlist.append(value) 

130 for key, value in chain.from_iterable(d.items() for d in dlist): 

131 result[key] += int(value) 

132 return sort_by_name(dict(result))