Coverage for src/es_testbed/helpers/utils.py: 80%
102 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-23 13:32 -0600
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-23 13:32 -0600
1"""Utility helper functions"""
2import typing as t
3import random
4import string
5import logging
6from copy import deepcopy
7from datetime import datetime, timezone
8from es_testbed.defaults import ilm_force_merge, ilm_phase, MAPPING, TIER
9from es_testbed.exceptions import TestbedMisconfig
11def build_ilm_phase(tier, actions=None, repository=None):
12 """Build a single ILM policy step based on tier"""
13 phase = ilm_phase(tier)
14 if tier in ['cold', 'frozen']:
15 if repository: 15 ↛ 18line 15 didn't jump to line 18, because the condition on line 15 was never false
16 phase[tier]['actions']['searchable_snapshot'] = {'snapshot_repository': repository}
17 else:
18 msg = f'Unable to build ILM phase for {tier} tier. Value for repository not provided'
19 raise TestbedMisconfig(msg)
20 if actions: 20 ↛ 21line 20 didn't jump to line 21, because the condition on line 20 was never true
21 phase[tier]['actions'].update(actions)
22 return phase
24def build_ilm_policy(
25 tiers: list=None,
26 forcemerge: bool=False,
27 max_num_segments: int=1,
28 repository: str=None
29 ):
30 """
31 Build a full ILM policy based on the provided tiers.
32 Put forcemerge in the last tier before cold or frozen (whichever comes first)
33 """
34 if not tiers: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 tiers = ['hot', 'delete']
36 phases = {}
37 if ('cold' in tiers or 'frozen' in tiers) and not repository: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true
38 raise TestbedMisconfig('Cannot build cold or frozen phase without repository')
39 for tier in tiers:
40 phases.update(build_ilm_phase(tier, repository=repository))
41 if forcemerge:
42 phases['hot']['actions'].update(ilm_force_merge(max_num_segments=max_num_segments))
43 return {'phases': phases}
45def doc_gen(count=10, start_at=0, match=True):
46 """Create this doc for each count"""
47 keys = ['message', 'nested', 'deep']
48 # Start with an empty map
49 matchmap = {}
50 # Iterate over each key
51 for key in keys:
52 # If match is True
53 if match: 53 ↛ 58line 53 didn't jump to line 58, because the condition on line 53 was never false
54 # Set matchmap[key] to key
55 matchmap[key] = key
56 else:
57 # Otherwise matchmap[key] will have a random string value
58 matchmap[key] = randomstr()
60 # This is where count and start_at matter
61 for num in range(start_at, start_at + count):
62 yield {
63 '@timestamp': iso8601_now(),
64 'message': f'{matchmap["message"]}{num}', # message# or randomstr#
65 'number': num if match else random.randint(1001, 32767), # value of num or random int
66 'nested': {
67 'key': f'{matchmap["nested"]}{num}' # nested#
68 },
69 'deep': {
70 'l1': {
71 'l2': {
72 'l3': f'{matchmap["deep"]}{num}' # deep#
73 }
74 }
75 }
76 }
78def ds_action_generator(data_stream: str, index: str, action: str=None):
79 """Generate a single add or remove backing index action for a data_stream"""
80 if not action or action not in ['add', 'remove']:
81 raise TestbedMisconfig('action must be "add" or "remove"')
82 return {
83 f'{action}_backing_index': {
84 'data_stream': data_stream,
85 'index': index
86 }
87 }
89def getlogger(name: str) -> logging.getLogger:
90 """Return a named logger"""
91 return logging.getLogger(name)
93def get_routing(tier='hot'):
94 """Return the routing allocation tier preference"""
95 try:
96 pref = TIER[tier]['pref']
97 except KeyError:
98 # Fallback value
99 pref = 'data_content'
100 return {'index.routing.allocation.include._tier_preference': pref}
102def iso8601_now() -> str:
103 """
104 :returns: An ISO8601 timestamp based on now
105 :rtype: str
106 """
107 # Because Python 3.12 now requires non-naive timezone declarations, we must change.
108 #
109 ### Example:
110 ### The new way:
111 ### datetime.now(timezone.utc).isoformat()
112 ### Result: 2024-04-16T16:00:00+00:00
113 ### End Example
114 #
115 # Note that the +00:00 is appended now where we affirmatively declare the UTC timezone
116 #
117 # As a result, we will use this function to prune away the timezone if it is +00:00 and replace
118 # it with Z, which is shorter Zulu notation for UTC (which Elasticsearch uses)
119 #
120 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be +00:00, but could
121 # in theory sometime show up as a Z, so we test for that.
123 parts = datetime.now(timezone.utc).isoformat().split('+')
124 if len(parts) == 1: 124 ↛ 125line 124 didn't jump to line 125, because the condition on line 124 was never true
125 if parts[0][-1] == 'Z':
126 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time
127 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there
128 if parts[1] == '00:00': 128 ↛ 130line 128 didn't jump to line 130, because the condition on line 128 was never false
129 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there
130 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was
132def mapping_component():
133 """Return a mappings component template"""
134 return {'mappings': MAPPING}
136def mounted_name(index, tier):
137 """Return a value for renamed_index for mounting a searchable snapshot index"""
138 return f'{TIER[tier]["prefix"]}-{index}'
140def posmatch(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[int]:
141 """
142 Compare the values in compare with original. Return a list of index positions from
143 compare of any values that match.
144 """
145 logger = getlogger(__name__)
146 positions = []
147 for orig in original:
148 for idx, comp in enumerate(compare):
149 if comp == orig:
150 logger.debug('Value %s found in both original and compare', comp)
151 positions.append(idx)
152 return positions
154def randomstr(length: int=16, lowercase: bool=False):
155 """Generate a random string"""
156 letters = string.ascii_uppercase
157 if lowercase: 157 ↛ 159line 157 didn't jump to line 159, because the condition on line 157 was never false
158 letters = string.ascii_lowercase
159 return str(''.join(random.choices(letters + string.digits, k=length)))
161def remove_by_index(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]:
162 """By list index position, remove entries from compare which also exist in original"""
163 logger = getlogger(__name__)
164 positions = posmatch(original, compare)
165 if not positions:
166 logger.debug('No matching values to remove')
167 return compare
168 # If you del list[0] first, you get a new list[0]
169 # We need to delete from the highest index value to the lowest
170 positions.sort() # Sort first to ensure lowest to highest order
171 for idx in reversed(positions): # Reverse the list and iterate
172 del compare[idx] # Delete the value at position idx
173 return compare
175def setting_component(ilm_policy: str=None, rollover_alias: str=None):
176 """Return a settings component template"""
177 val = {'settings':{'index.number_of_replicas': 0}}
178 if ilm_policy:
179 val['settings']['index.lifecycle.name'] = ilm_policy
180 if rollover_alias:
181 val['settings']['index.lifecycle.rollover_alias'] = rollover_alias
182 return val
184def storage_type(tier):
185 """Return the storage type of a searchable snapshot by tier"""
186 return TIER[tier]["storage"]
188def uniq_values(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]:
189 """Return any values unique to list 'compare'"""
190 logger = getlogger(__name__)
191 # Use deepcopy of compare so we don't change the original
192 uniq = remove_by_index(original, deepcopy(compare))
193 if uniq:
194 logger.debug('Values found only in compare: %s', uniq)
195 return uniq