Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/es-testbed/lib/python3.12/site-packages/es_testbed/helpers/utils.py: 86%

102 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-24 22:41 -0600

1"""Utility helper functions""" 

2import typing as t 

3import random 

4import string 

5import logging 

6from copy import deepcopy 

7from datetime import datetime, timezone 

8from es_testbed.defaults import ilm_force_merge, ilm_phase, MAPPING, TIER 

9from es_testbed.exceptions import TestbedMisconfig 

10 

11def build_ilm_phase(tier, actions=None, repository=None): 

12 """Build a single ILM policy step based on tier""" 

13 phase = ilm_phase(tier) 

14 if tier in ['cold', 'frozen']: 

15 if repository: 

16 phase[tier]['actions']['searchable_snapshot'] = {'snapshot_repository': repository} 

17 else: 

18 msg = f'Unable to build ILM phase for {tier} tier. Value for repository not provided' 

19 raise TestbedMisconfig(msg) 

20 if actions: 

21 phase[tier]['actions'].update(actions) 

22 return phase 

23 

24def build_ilm_policy( 

25 tiers: list=None, 

26 forcemerge: bool=False, 

27 max_num_segments: int=1, 

28 repository: str=None 

29 ): 

30 """ 

31 Build a full ILM policy based on the provided tiers. 

32 Put forcemerge in the last tier before cold or frozen (whichever comes first) 

33 """ 

34 if not tiers: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 tiers = ['hot', 'delete'] 

36 phases = {} 

37 if ('cold' in tiers or 'frozen' in tiers) and not repository: 

38 raise TestbedMisconfig('Cannot build cold or frozen phase without repository') 

39 for tier in tiers: 

40 phases.update(build_ilm_phase(tier, repository=repository)) 

41 if forcemerge: 

42 phases['hot']['actions'].update(ilm_force_merge(max_num_segments=max_num_segments)) 

43 return {'phases': phases} 

44 

45def doc_gen(count=10, start_at=0, match=True): 

46 """Create this doc for each count""" 

47 keys = ['message', 'nested', 'deep'] 

48 # Start with an empty map 

49 matchmap = {} 

50 # Iterate over each key 

51 for key in keys: 

52 # If match is True 

53 if match: 53 ↛ 58line 53 didn't jump to line 58, because the condition on line 53 was never false

54 # Set matchmap[key] to key 

55 matchmap[key] = key 

56 else: 

57 # Otherwise matchmap[key] will have a random string value 

58 matchmap[key] = randomstr() 

59 

60 # This is where count and start_at matter 

61 for num in range(start_at, start_at + count): 

62 yield { 

63 '@timestamp': iso8601_now(), 

64 'message': f'{matchmap["message"]}{num}', # message# or randomstr# 

65 'number': num if match else random.randint(1001, 32767), # value of num or random int 

66 'nested': { 

67 'key': f'{matchmap["nested"]}{num}' # nested# 

68 }, 

69 'deep': { 

70 'l1': { 

71 'l2': { 

72 'l3': f'{matchmap["deep"]}{num}' # deep# 

73 } 

74 } 

75 } 

76 } 

77 

78def ds_action_generator(data_stream: str, index: str, action: str=None): 

79 """Generate a single add or remove backing index action for a data_stream""" 

80 if not action or action not in ['add', 'remove']: 

81 raise TestbedMisconfig('action must be "add" or "remove"') 

82 return { 

83 f'{action}_backing_index': { 

84 'data_stream': data_stream, 

85 'index': index 

86 } 

87 } 

88 

89def getlogger(name: str) -> logging.getLogger: 

90 """Return a named logger""" 

91 return logging.getLogger(name) 

92 

93def get_routing(tier='hot'): 

94 """Return the routing allocation tier preference""" 

95 try: 

96 pref = TIER[tier]['pref'] 

97 except KeyError: 

98 # Fallback value 

99 pref = 'data_content' 

100 return {'index.routing.allocation.include._tier_preference': pref} 

101 

102def iso8601_now() -> str: 

103 """ 

104 :returns: An ISO8601 timestamp based on now 

105 :rtype: str 

106 """ 

107 # Because Python 3.12 now requires non-naive timezone declarations, we must change. 

108 # 

109 ### Example: 

110 ### The new way: 

111 ### datetime.now(timezone.utc).isoformat() 

112 ### Result: 2024-04-16T16:00:00+00:00 

113 ### End Example 

114 # 

115 # Note that the +00:00 is appended now where we affirmatively declare the UTC timezone 

116 # 

117 # As a result, we will use this function to prune away the timezone if it is +00:00 and replace 

118 # it with Z, which is shorter Zulu notation for UTC (which Elasticsearch uses) 

119 # 

120 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be +00:00, but could 

121 # in theory sometime show up as a Z, so we test for that. 

122 

123 parts = datetime.now(timezone.utc).isoformat().split('+') 

124 if len(parts) == 1: 124 ↛ 125line 124 didn't jump to line 125, because the condition on line 124 was never true

125 if parts[0][-1] == 'Z': 

126 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time 

127 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

128 if parts[1] == '00:00': 128 ↛ 130line 128 didn't jump to line 130, because the condition on line 128 was never false

129 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

130 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was 

131 

132def mapping_component(): 

133 """Return a mappings component template""" 

134 return {'mappings': MAPPING} 

135 

136def mounted_name(index, tier): 

137 """Return a value for renamed_index for mounting a searchable snapshot index""" 

138 return f'{TIER[tier]["prefix"]}-{index}' 

139 

140def posmatch(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[int]: 

141 """ 

142 Compare the values in compare with original. Return a list of index positions from 

143 compare of any values that match. 

144 """ 

145 logger = getlogger(__name__) 

146 positions = [] 

147 for orig in original: 

148 for idx, comp in enumerate(compare): 

149 if comp == orig: 

150 logger.debug('Value %s found in both original and compare', comp) 

151 positions.append(idx) 

152 return positions 

153 

154def randomstr(length: int=16, lowercase: bool=False): 

155 """Generate a random string""" 

156 letters = string.ascii_uppercase 

157 if lowercase: 157 ↛ 159line 157 didn't jump to line 159, because the condition on line 157 was never false

158 letters = string.ascii_lowercase 

159 return str(''.join(random.choices(letters + string.digits, k=length))) 

160 

161def remove_by_index(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]: 

162 """By list index position, remove entries from compare which also exist in original""" 

163 logger = getlogger(__name__) 

164 positions = posmatch(original, compare) 

165 if not positions: 

166 logger.debug('No matching values to remove') 

167 return compare 

168 # If you del list[0] first, you get a new list[0] 

169 # We need to delete from the highest index value to the lowest 

170 positions.sort() # Sort first to ensure lowest to highest order 

171 for idx in reversed(positions): # Reverse the list and iterate 

172 del compare[idx] # Delete the value at position idx 

173 return compare 

174 

175def setting_component(ilm_policy: str=None, rollover_alias: str=None): 

176 """Return a settings component template""" 

177 val = {'settings':{'index.number_of_replicas': 0}} 

178 if ilm_policy: 

179 val['settings']['index.lifecycle.name'] = ilm_policy 

180 if rollover_alias: 

181 val['settings']['index.lifecycle.rollover_alias'] = rollover_alias 

182 return val 

183 

184def storage_type(tier): 

185 """Return the storage type of a searchable snapshot by tier""" 

186 return TIER[tier]["storage"] 

187 

188def uniq_values(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]: 

189 """Return any values unique to list 'compare'""" 

190 logger = getlogger(__name__) 

191 # Use deepcopy of compare so we don't change the original 

192 uniq = remove_by_index(original, deepcopy(compare)) 

193 if uniq: 

194 logger.debug('Values found only in compare: %s', uniq) 

195 return uniq