Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/es-testbed/lib/python3.12/site-packages/es_testbed/helpers/utils.py: 23%

109 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-08-30 20:56 -0600

1"""Utility helper functions""" 

2 

3import sys 

4import typing as t 

5import random 

6import string 

7import logging 

8from datetime import datetime, timezone 

9from pathlib import Path 

10from pprint import pformat 

11from shutil import rmtree 

12from tempfile import mkdtemp 

13from git import Repo 

14from ..defaults import ilm_force_merge, ilm_phase, TIER 

15from ..exceptions import TestbedMisconfig 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20def build_ilm_phase( 

21 phase: str, 

22 actions: t.Union[t.Dict, None] = None, 

23 repo: t.Union[str, None] = None, 

24 fm: bool = False, 

25) -> t.Dict: 

26 """Build a single ILM policy step based on phase""" 

27 retval = ilm_phase(phase) 

28 if phase in ['cold', 'frozen']: 

29 if repo: 

30 retval[phase]['actions']['searchable_snapshot'] = { 

31 'snapshot_repository': repo, 

32 'force_merge_index': fm, 

33 } 

34 else: 

35 msg = ( 

36 f'Unable to build {phase} ILM phase. Value for repository not ' 

37 f'provided' 

38 ) 

39 raise TestbedMisconfig(msg) 

40 if actions: 

41 retval[phase]['actions'].update(actions) 

42 return retval 

43 

44 

45def build_ilm_policy( 

46 phases: list = None, 

47 forcemerge: bool = False, 

48 max_num_segments: int = 1, 

49 readonly: t.Union[str, None] = None, 

50 repository: t.Union[str, None] = None, 

51) -> t.Dict: 

52 """ 

53 Build a full ILM policy based on the provided phases. 

54 Put forcemerge in the last phase before cold or frozen (whichever comes first) 

55 """ 

56 if not phases: 

57 phases = ['hot', 'delete'] 

58 retval = {} 

59 if ('cold' in phases or 'frozen' in phases) and not repository: 

60 raise TestbedMisconfig('Cannot build cold or frozen phase without repository') 

61 for phase in phases: 

62 actions = None 

63 if readonly == phase: 

64 actions = {"readonly": {}} 

65 phase = build_ilm_phase(phase, repo=repository, fm=forcemerge, actions=actions) 

66 retval.update(phase) 

67 if forcemerge: 

68 retval['hot']['actions'].update( 

69 ilm_force_merge(max_num_segments=max_num_segments) 

70 ) 

71 return {'phases': retval} 

72 

73 

74# def doc_gen( 

75# count: int = 10, start_at: int = 0, match: bool = True 

76# ) -> t.Generator[t.Dict, None, None]: 

77# """Create this doc for each count""" 

78# keys = ['message', 'nested', 'deep'] 

79# # Start with an empty map 

80# matchmap = {} 

81# # Iterate over each key 

82# for key in keys: 

83# # If match is True 

84# if match: 

85# # Set matchmap[key] to key 

86# matchmap[key] = key 

87# else: 

88# # Otherwise matchmap[key] will have a random string value 

89# matchmap[key] = randomstr() 

90 

91# # This is where count and start_at matter 

92# for num in range(start_at, start_at + count): 

93# yield { 

94# '@timestamp': iso8601_now(), 

95# 'message': f'{matchmap["message"]}{num}', # message# or randomstr# 

96# 'number': ( 

97# num if match else random.randint(1001, 32767) 

98# ), # value of num or random int 

99# 'nested': {'key': f'{matchmap["nested"]}{num}'}, # nested# 

100# 'deep': {'l1': {'l2': {'l3': f'{matchmap["deep"]}{num}'}}}, # deep# 

101# } 

102 

103 

104# def getlogger(name: str) -> logging.getLogger: 

105# """Return a named logger""" 

106# return logging.getLogger(name) 

107 

108 

109def get_routing(tier='hot') -> t.Dict: 

110 """Return the routing allocation tier preference""" 

111 try: 

112 pref = TIER[tier]['pref'] 

113 except KeyError: 

114 # Fallback value 

115 pref = 'data_content' 

116 return {'index.routing.allocation.include._tier_preference': pref} 

117 

118 

119def iso8601_now() -> str: 

120 """ 

121 :returns: An ISO8601 timestamp based on now 

122 :rtype: str 

123 """ 

124 # Because Python 3.12 now requires non-naive timezone declarations, we must change. 

125 # 

126 # ## Example: 

127 # ## The new way: 

128 # ## datetime.now(timezone.utc).isoformat() 

129 # ## Result: 2024-04-16T16:00:00+00:00 

130 # ## End Example 

131 # 

132 # Note that the +00:00 is appended now where we affirmatively declare the 

133 # UTC timezone 

134 # 

135 # As a result, we will use this function to prune away the timezone if it is 

136 # +00:00 and replace it with Z, which is shorter Zulu notation for UTC (which 

137 # Elasticsearch uses) 

138 # 

139 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be 

140 # +00:00, but could in theory sometime show up as a Z, so we test for that. 

141 

142 parts = datetime.now(timezone.utc).isoformat().split('+') 

143 if len(parts) == 1: 

144 if parts[0][-1] == 'Z': 

145 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time 

146 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

147 if parts[1] == '00:00': 

148 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

149 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was 

150 

151 

152def mounted_name(index: str, tier: str): 

153 """Return a value for renamed_index for mounting a searchable snapshot index""" 

154 return f'{TIER[tier]["prefix"]}-{index}' 

155 

156 

157def prettystr(*args, **kwargs) -> str: 

158 """ 

159 A (nearly) straight up wrapper for pprint.pformat, except that we provide our own 

160 default values for 'indent' (2) and 'sort_dicts' (False). Primarily for debug 

161 logging and showing more readable dictionaries. 

162 

163 'Return the formatted representation of object as a string. indent, width, depth, 

164 compact, sort_dicts and underscore_numbers are passed to the PrettyPrinter 

165 constructor as formatting parameters' (from pprint documentation). 

166 

167 The keyword arg, ``underscore_numbers`` is only available in Python versions 

168 3.10 and up, so there is a test here to add it when that is the case. 

169 """ 

170 defaults = [ 

171 ('indent', 2), 

172 ('width', 80), 

173 ('depth', None), 

174 ('compact', False), 

175 ('sort_dicts', False), 

176 ] 

177 vinfo = python_version() 

178 if vinfo[0] == 3 and vinfo[1] >= 10: 

179 # underscore_numbers only works in 3.10 and up 

180 defaults.append(('underscore_numbers', False)) 

181 kw = {} 

182 for tup in defaults: 

183 key, default = tup 

184 kw[key] = kwargs[key] if key in kwargs else default 

185 

186 return f"\n{pformat(*args, **kw)}" # newline in front so it always starts clean 

187 

188 

189def process_preset( 

190 builtin: t.Union[str, None], 

191 path: t.Union[str, None], 

192 ref: t.Union[str, None], 

193 url: t.Union[str, None], 

194) -> t.Tuple: 

195 """Process the preset settings 

196 :param preset: One of `builtin`, `git`, or `path` 

197 :param builtin: The name of a builtin preset 

198 :param path: A relative or absolute file path. Used by presets `git` and `path` 

199 :param ref: A Git ref (e.g. 'main'). Only used by preset `git` 

200 :param url: A Git repository URL. Only used by preset `git` 

201 """ 

202 modpath = None 

203 tmpdir = None 

204 if builtin: # Overrides any other options 

205 modpath = f'es_testbed.presets.{builtin}' 

206 else: 

207 trygit = False 

208 try: 

209 kw = {'path': path, 'ref': ref, 'url': url} 

210 raise_on_none(**kw) 

211 trygit = True # We have all 3 kwargs necessary for git 

212 except ValueError as resp: # Not able to do a git preset 

213 logger.debug('Unable to import a git-based preset: %s', resp) 

214 if trygit: # Trying a git import 

215 tmpdir = mkdtemp() 

216 try: 

217 _ = Repo.clone_from(url, tmpdir, branch=ref, depth=1) 

218 filepath = Path(tmpdir) / path 

219 except Exception as err: 

220 logger.error('Git clone failed: %s', err) 

221 rmtree(tmpdir) # Clean up after failed attempt 

222 raise err 

223 if path: 

224 filepath = Path(path) 

225 if not filepath.resolve().is_dir(): 

226 raise ValueError(f'The provided path "{path}" is not a directory') 

227 modpath = filepath.resolve().name # The final dirname 

228 parent = filepath.parent.resolve() # Up one level 

229 # We now make the parent path part of the sys.path. 

230 sys.path.insert(0, parent) # This should persist beyond this module 

231 return modpath, tmpdir 

232 

233 

234def python_version() -> t.Tuple: 

235 """ 

236 Return running Python version tuple, e.g. 3.12.2 would be (3, 12, 2) 

237 """ 

238 _ = sys.version_info 

239 return (_[0], _[1], _[2]) 

240 

241 

242def raise_on_none(**kwargs): 

243 """Raise if any kwargs have a None value""" 

244 for key, value in kwargs.items(): 

245 if value is None: 

246 raise ValueError(f'kwarg "{key}" cannot have a None value') 

247 

248 

249def randomstr(length: int = 16, lowercase: bool = False) -> str: 

250 """Generate a random string""" 

251 letters = string.ascii_uppercase 

252 if lowercase: 

253 letters = string.ascii_lowercase 

254 return str(''.join(random.choices(letters + string.digits, k=length))) 

255 

256 

257def storage_type(tier: str) -> t.Dict: 

258 """Return the storage type of a searchable snapshot by tier""" 

259 return TIER[tier]["storage"]