Coverage for /Users/buh/.pyenv/versions/3.12.9/envs/es-testbed/lib/python3.12/site-packages/es_testbed/helpers/utils.py: 94%

108 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-17 19:29 -0600

1"""Utility helper functions""" 

2 

3import sys 

4import typing as t 

5import random 

6import string 

7import logging 

8import datetime 

9from pathlib import Path 

10from pprint import pformat 

11from shutil import rmtree 

12from tempfile import mkdtemp 

13from git import Repo 

14from ..defaults import ilm_force_merge, ilm_phase, TIER 

15from ..exceptions import TestbedMisconfig 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20def build_ilm_phase( 

21 phase: str, 

22 actions: t.Union[t.Dict, None] = None, 

23 repo: t.Union[str, None] = None, 

24 fm: bool = False, 

25) -> t.Dict: 

26 """Build a single ILM policy step based on phase""" 

27 retval = ilm_phase(phase) 

28 if phase in ['cold', 'frozen']: 

29 if repo: 

30 retval[phase]['actions']['searchable_snapshot'] = { 

31 'snapshot_repository': repo, 

32 'force_merge_index': fm, 

33 } 

34 else: 

35 msg = ( 

36 f'Unable to build {phase} ILM phase. Value for repository not ' 

37 f'provided' 

38 ) 

39 raise TestbedMisconfig(msg) 

40 if actions: 

41 retval[phase]['actions'].update(actions) 

42 return retval 

43 

44 

45def build_ilm_policy( 

46 phases: list = None, 

47 forcemerge: bool = False, 

48 max_num_segments: int = 1, 

49 readonly: t.Union[str, None] = None, 

50 repository: t.Union[str, None] = None, 

51) -> t.Dict: 

52 """ 

53 Build a full ILM policy based on the provided phases. 

54 Put forcemerge in the last phase before cold or frozen (whichever comes first) 

55 """ 

56 if not phases: 

57 phases = ['hot', 'delete'] 

58 retval = {} 

59 if ('cold' in phases or 'frozen' in phases) and not repository: 

60 raise TestbedMisconfig('Cannot build cold or frozen phase without repository') 

61 for phase in phases: 

62 actions = None 

63 if readonly == phase: 

64 actions = {"readonly": {}} 

65 phase = build_ilm_phase(phase, repo=repository, fm=forcemerge, actions=actions) 

66 retval.update(phase) 

67 if forcemerge: 

68 retval['hot']['actions'].update( 

69 ilm_force_merge(max_num_segments=max_num_segments) 

70 ) 

71 return {'phases': retval} 

72 

73 

74def get_routing(tier='hot') -> t.Dict: 

75 """Return the routing allocation tier preference""" 

76 try: 

77 pref = TIER[tier]['pref'] 

78 except KeyError: 

79 # Fallback value 

80 pref = 'data_content' 

81 return {'index.routing.allocation.include._tier_preference': pref} 

82 

83 

84def iso8601_now() -> str: 

85 """ 

86 :returns: An ISO8601 timestamp based on now 

87 :rtype: str 

88 """ 

89 # Because Python 3.12 now requires non-naive timezone declarations, we must change. 

90 # 

91 # ## Example: 

92 # ## The new way: 

93 # ## datetime.now(timezone.utc).isoformat() 

94 # ## Result: 2024-04-16T16:00:00+00:00 

95 # ## End Example 

96 # 

97 # Note that the +00:00 is appended now where we affirmatively declare the 

98 # UTC timezone 

99 # 

100 # As a result, we will use this function to prune away the timezone if it is 

101 # +00:00 and replace it with Z, which is shorter Zulu notation for UTC (which 

102 # Elasticsearch uses) 

103 # 

104 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be 

105 # +00:00, but could in theory sometime show up as a Z, so we test for that. 

106 parts = datetime.datetime.now(datetime.timezone.utc).isoformat().split('+') 

107 if len(parts) == 1: 

108 if parts[0][-1] == 'Z': 

109 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time 

110 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

111 if parts[1] == '00:00': 

112 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

113 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was 

114 

115 

116def mounted_name(index: str, tier: str): 

117 """Return a value for renamed_index for mounting a searchable snapshot index""" 

118 return f'{TIER[tier]["prefix"]}-{index}' 

119 

120 

121def prettystr(*args, **kwargs) -> str: 

122 """ 

123 A (nearly) straight up wrapper for pprint.pformat, except that we provide our own 

124 default values for 'indent' (2) and 'sort_dicts' (False). Primarily for debug 

125 logging and showing more readable dictionaries. 

126 

127 'Return the formatted representation of object as a string. indent, width, depth, 

128 compact, sort_dicts and underscore_numbers are passed to the PrettyPrinter 

129 constructor as formatting parameters' (from pprint documentation). 

130 

131 The keyword arg, ``underscore_numbers`` is only available in Python versions 

132 3.10 and up, so there is a test here to add it when that is the case. 

133 """ 

134 defaults = [ 

135 ('indent', 2), 

136 ('width', 80), 

137 ('depth', None), 

138 ('compact', False), 

139 ('sort_dicts', False), 

140 ] 

141 vinfo = python_version() 

142 if vinfo[0] == 3 and vinfo[1] >= 10: 

143 # underscore_numbers only works in 3.10 and up 

144 defaults.append(('underscore_numbers', False)) 

145 kw = {} 

146 for tup in defaults: 

147 key, default = tup 

148 kw[key] = kwargs[key] if key in kwargs else default 

149 

150 return f"\n{pformat(*args, **kw)}" # newline in front so it always starts clean 

151 

152 

153def process_preset( 

154 builtin: t.Union[str, None], 

155 path: t.Union[str, None], 

156 ref: t.Union[str, None], 

157 url: t.Union[str, None], 

158) -> t.Tuple: 

159 """Process the preset settings 

160 :param preset: One of `builtin`, `git`, or `path` 

161 :param builtin: The name of a builtin preset 

162 :param path: A relative or absolute file path. Used by presets `git` and `path` 

163 :param ref: A Git ref (e.g. 'main'). Only used by preset `git` 

164 :param url: A Git repository URL. Only used by preset `git` 

165 """ 

166 modpath = None 

167 tmpdir = None 

168 if builtin: # Overrides any other options 

169 modpath = f'es_testbed.presets.{builtin}' 

170 else: 

171 trygit = False 

172 try: 

173 kw = {'path': path, 'ref': ref, 'url': url} 

174 raise_on_none(**kw) 

175 trygit = True # We have all 3 kwargs necessary for git 

176 except ValueError as resp: # Not able to do a git preset 

177 logger.debug(f'Unable to import a git-based preset: {resp}') 

178 if trygit: # Trying a git import 

179 tmpdir = mkdtemp() 

180 try: 

181 _ = Repo.clone_from(url, tmpdir, branch=ref, depth=1) 

182 filepath = Path(tmpdir) / path 

183 except Exception as err: 

184 logger.error(f'Git clone failed: {err}') 

185 rmtree(tmpdir) # Clean up after failed attempt 

186 raise err 

187 filepath = Path(path) # It should work even if path is None 

188 if not filepath.resolve().is_dir(): 

189 raise ValueError(f'The provided path "{path}" is not a directory') 

190 modpath = filepath.resolve().name # The final dirname 

191 parent = filepath.parent.resolve() # Up one level 

192 # We now make the parent path part of the sys.path. 

193 sys.path.insert(0, parent) # This should persist beyond this module 

194 return modpath, tmpdir 

195 

196 

197def python_version() -> t.Tuple: 

198 """ 

199 Return running Python version tuple, e.g. 3.12.2 would be (3, 12, 2) 

200 """ 

201 _ = sys.version_info 

202 return (_[0], _[1], _[2]) 

203 

204 

205def raise_on_none(**kwargs): 

206 """Raise if any kwargs have a None value""" 

207 for key, value in kwargs.items(): 

208 if value is None: 

209 raise ValueError(f'kwarg "{key}" cannot have a None value') 

210 

211 

212def randomstr(length: int = 16, lowercase: bool = False) -> str: 

213 """Generate a random string""" 

214 letters = string.ascii_uppercase 

215 if lowercase: 

216 letters = string.ascii_lowercase 

217 return str(''.join(random.choices(letters + string.digits, k=length))) 

218 

219 

220def storage_type(tier: str) -> t.Dict: 

221 """Return the storage type of a searchable snapshot by tier""" 

222 return TIER[tier]["storage"]