Coverage for src/es_testbed/helpers/utils.py: 80%

102 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-23 13:32 -0600

1"""Utility helper functions""" 

2import typing as t 

3import random 

4import string 

5import logging 

6from copy import deepcopy 

7from datetime import datetime, timezone 

8from es_testbed.defaults import ilm_force_merge, ilm_phase, MAPPING, TIER 

9from es_testbed.exceptions import TestbedMisconfig 

10 

11def build_ilm_phase(tier, actions=None, repository=None): 

12 """Build a single ILM policy step based on tier""" 

13 phase = ilm_phase(tier) 

14 if tier in ['cold', 'frozen']: 

15 if repository: 15 ↛ 18line 15 didn't jump to line 18, because the condition on line 15 was never false

16 phase[tier]['actions']['searchable_snapshot'] = {'snapshot_repository': repository} 

17 else: 

18 msg = f'Unable to build ILM phase for {tier} tier. Value for repository not provided' 

19 raise TestbedMisconfig(msg) 

20 if actions: 20 ↛ 21line 20 didn't jump to line 21, because the condition on line 20 was never true

21 phase[tier]['actions'].update(actions) 

22 return phase 

23 

24def build_ilm_policy( 

25 tiers: list=None, 

26 forcemerge: bool=False, 

27 max_num_segments: int=1, 

28 repository: str=None 

29 ): 

30 """ 

31 Build a full ILM policy based on the provided tiers. 

32 Put forcemerge in the last tier before cold or frozen (whichever comes first) 

33 """ 

34 if not tiers: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 tiers = ['hot', 'delete'] 

36 phases = {} 

37 if ('cold' in tiers or 'frozen' in tiers) and not repository: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true

38 raise TestbedMisconfig('Cannot build cold or frozen phase without repository') 

39 for tier in tiers: 

40 phases.update(build_ilm_phase(tier, repository=repository)) 

41 if forcemerge: 

42 phases['hot']['actions'].update(ilm_force_merge(max_num_segments=max_num_segments)) 

43 return {'phases': phases} 

44 

45def doc_gen(count=10, start_at=0, match=True): 

46 """Create this doc for each count""" 

47 keys = ['message', 'nested', 'deep'] 

48 # Start with an empty map 

49 matchmap = {} 

50 # Iterate over each key 

51 for key in keys: 

52 # If match is True 

53 if match: 53 ↛ 58line 53 didn't jump to line 58, because the condition on line 53 was never false

54 # Set matchmap[key] to key 

55 matchmap[key] = key 

56 else: 

57 # Otherwise matchmap[key] will have a random string value 

58 matchmap[key] = randomstr() 

59 

60 # This is where count and start_at matter 

61 for num in range(start_at, start_at + count): 

62 yield { 

63 '@timestamp': iso8601_now(), 

64 'message': f'{matchmap["message"]}{num}', # message# or randomstr# 

65 'number': num if match else random.randint(1001, 32767), # value of num or random int 

66 'nested': { 

67 'key': f'{matchmap["nested"]}{num}' # nested# 

68 }, 

69 'deep': { 

70 'l1': { 

71 'l2': { 

72 'l3': f'{matchmap["deep"]}{num}' # deep# 

73 } 

74 } 

75 } 

76 } 

77 

78def ds_action_generator(data_stream: str, index: str, action: str=None): 

79 """Generate a single add or remove backing index action for a data_stream""" 

80 if not action or action not in ['add', 'remove']: 

81 raise TestbedMisconfig('action must be "add" or "remove"') 

82 return { 

83 f'{action}_backing_index': { 

84 'data_stream': data_stream, 

85 'index': index 

86 } 

87 } 

88 

89def getlogger(name: str) -> logging.getLogger: 

90 """Return a named logger""" 

91 return logging.getLogger(name) 

92 

93def get_routing(tier='hot'): 

94 """Return the routing allocation tier preference""" 

95 try: 

96 pref = TIER[tier]['pref'] 

97 except KeyError: 

98 # Fallback value 

99 pref = 'data_content' 

100 return {'index.routing.allocation.include._tier_preference': pref} 

101 

102def iso8601_now() -> str: 

103 """ 

104 :returns: An ISO8601 timestamp based on now 

105 :rtype: str 

106 """ 

107 # Because Python 3.12 now requires non-naive timezone declarations, we must change. 

108 # 

109 ### Example: 

110 ### The new way: 

111 ### datetime.now(timezone.utc).isoformat() 

112 ### Result: 2024-04-16T16:00:00+00:00 

113 ### End Example 

114 # 

115 # Note that the +00:00 is appended now where we affirmatively declare the UTC timezone 

116 # 

117 # As a result, we will use this function to prune away the timezone if it is +00:00 and replace 

118 # it with Z, which is shorter Zulu notation for UTC (which Elasticsearch uses) 

119 # 

120 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be +00:00, but could 

121 # in theory sometime show up as a Z, so we test for that. 

122 

123 parts = datetime.now(timezone.utc).isoformat().split('+') 

124 if len(parts) == 1: 124 ↛ 125line 124 didn't jump to line 125, because the condition on line 124 was never true

125 if parts[0][-1] == 'Z': 

126 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time 

127 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

128 if parts[1] == '00:00': 128 ↛ 130line 128 didn't jump to line 130, because the condition on line 128 was never false

129 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there 

130 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was 

131 

132def mapping_component(): 

133 """Return a mappings component template""" 

134 return {'mappings': MAPPING} 

135 

136def mounted_name(index, tier): 

137 """Return a value for renamed_index for mounting a searchable snapshot index""" 

138 return f'{TIER[tier]["prefix"]}-{index}' 

139 

140def posmatch(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[int]: 

141 """ 

142 Compare the values in compare with original. Return a list of index positions from 

143 compare of any values that match. 

144 """ 

145 logger = getlogger(__name__) 

146 positions = [] 

147 for orig in original: 

148 for idx, comp in enumerate(compare): 

149 if comp == orig: 

150 logger.debug('Value %s found in both original and compare', comp) 

151 positions.append(idx) 

152 return positions 

153 

154def randomstr(length: int=16, lowercase: bool=False): 

155 """Generate a random string""" 

156 letters = string.ascii_uppercase 

157 if lowercase: 157 ↛ 159line 157 didn't jump to line 159, because the condition on line 157 was never false

158 letters = string.ascii_lowercase 

159 return str(''.join(random.choices(letters + string.digits, k=length))) 

160 

161def remove_by_index(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]: 

162 """By list index position, remove entries from compare which also exist in original""" 

163 logger = getlogger(__name__) 

164 positions = posmatch(original, compare) 

165 if not positions: 

166 logger.debug('No matching values to remove') 

167 return compare 

168 # If you del list[0] first, you get a new list[0] 

169 # We need to delete from the highest index value to the lowest 

170 positions.sort() # Sort first to ensure lowest to highest order 

171 for idx in reversed(positions): # Reverse the list and iterate 

172 del compare[idx] # Delete the value at position idx 

173 return compare 

174 

175def setting_component(ilm_policy: str=None, rollover_alias: str=None): 

176 """Return a settings component template""" 

177 val = {'settings':{'index.number_of_replicas': 0}} 

178 if ilm_policy: 

179 val['settings']['index.lifecycle.name'] = ilm_policy 

180 if rollover_alias: 

181 val['settings']['index.lifecycle.rollover_alias'] = rollover_alias 

182 return val 

183 

184def storage_type(tier): 

185 """Return the storage type of a searchable snapshot by tier""" 

186 return TIER[tier]["storage"] 

187 

188def uniq_values(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]: 

189 """Return any values unique to list 'compare'""" 

190 logger = getlogger(__name__) 

191 # Use deepcopy of compare so we don't change the original 

192 uniq = remove_by_index(original, deepcopy(compare)) 

193 if uniq: 

194 logger.debug('Values found only in compare: %s', uniq) 

195 return uniq