Coverage for /Users/buh/.pyenv/versions/3.12.2/envs/es-testbed/lib/python3.12/site-packages/es_testbed/helpers/utils.py: 86%
102 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-24 22:41 -0600
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-24 22:41 -0600
1"""Utility helper functions"""
2import typing as t
3import random
4import string
5import logging
6from copy import deepcopy
7from datetime import datetime, timezone
8from es_testbed.defaults import ilm_force_merge, ilm_phase, MAPPING, TIER
9from es_testbed.exceptions import TestbedMisconfig
11def build_ilm_phase(tier, actions=None, repository=None):
12 """Build a single ILM policy step based on tier"""
13 phase = ilm_phase(tier)
14 if tier in ['cold', 'frozen']:
15 if repository:
16 phase[tier]['actions']['searchable_snapshot'] = {'snapshot_repository': repository}
17 else:
18 msg = f'Unable to build ILM phase for {tier} tier. Value for repository not provided'
19 raise TestbedMisconfig(msg)
20 if actions:
21 phase[tier]['actions'].update(actions)
22 return phase
24def build_ilm_policy(
25 tiers: list=None,
26 forcemerge: bool=False,
27 max_num_segments: int=1,
28 repository: str=None
29 ):
30 """
31 Build a full ILM policy based on the provided tiers.
32 Put forcemerge in the last tier before cold or frozen (whichever comes first)
33 """
34 if not tiers: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 tiers = ['hot', 'delete']
36 phases = {}
37 if ('cold' in tiers or 'frozen' in tiers) and not repository:
38 raise TestbedMisconfig('Cannot build cold or frozen phase without repository')
39 for tier in tiers:
40 phases.update(build_ilm_phase(tier, repository=repository))
41 if forcemerge:
42 phases['hot']['actions'].update(ilm_force_merge(max_num_segments=max_num_segments))
43 return {'phases': phases}
45def doc_gen(count=10, start_at=0, match=True):
46 """Create this doc for each count"""
47 keys = ['message', 'nested', 'deep']
48 # Start with an empty map
49 matchmap = {}
50 # Iterate over each key
51 for key in keys:
52 # If match is True
53 if match: 53 ↛ 58line 53 didn't jump to line 58, because the condition on line 53 was never false
54 # Set matchmap[key] to key
55 matchmap[key] = key
56 else:
57 # Otherwise matchmap[key] will have a random string value
58 matchmap[key] = randomstr()
60 # This is where count and start_at matter
61 for num in range(start_at, start_at + count):
62 yield {
63 '@timestamp': iso8601_now(),
64 'message': f'{matchmap["message"]}{num}', # message# or randomstr#
65 'number': num if match else random.randint(1001, 32767), # value of num or random int
66 'nested': {
67 'key': f'{matchmap["nested"]}{num}' # nested#
68 },
69 'deep': {
70 'l1': {
71 'l2': {
72 'l3': f'{matchmap["deep"]}{num}' # deep#
73 }
74 }
75 }
76 }
78def ds_action_generator(data_stream: str, index: str, action: str=None):
79 """Generate a single add or remove backing index action for a data_stream"""
80 if not action or action not in ['add', 'remove']:
81 raise TestbedMisconfig('action must be "add" or "remove"')
82 return {
83 f'{action}_backing_index': {
84 'data_stream': data_stream,
85 'index': index
86 }
87 }
89def getlogger(name: str) -> logging.getLogger:
90 """Return a named logger"""
91 return logging.getLogger(name)
93def get_routing(tier='hot'):
94 """Return the routing allocation tier preference"""
95 try:
96 pref = TIER[tier]['pref']
97 except KeyError:
98 # Fallback value
99 pref = 'data_content'
100 return {'index.routing.allocation.include._tier_preference': pref}
102def iso8601_now() -> str:
103 """
104 :returns: An ISO8601 timestamp based on now
105 :rtype: str
106 """
107 # Because Python 3.12 now requires non-naive timezone declarations, we must change.
108 #
109 ### Example:
110 ### The new way:
111 ### datetime.now(timezone.utc).isoformat()
112 ### Result: 2024-04-16T16:00:00+00:00
113 ### End Example
114 #
115 # Note that the +00:00 is appended now where we affirmatively declare the UTC timezone
116 #
117 # As a result, we will use this function to prune away the timezone if it is +00:00 and replace
118 # it with Z, which is shorter Zulu notation for UTC (which Elasticsearch uses)
119 #
120 # We are MANUALLY, FORCEFULLY declaring timezone.utc, so it should ALWAYS be +00:00, but could
121 # in theory sometime show up as a Z, so we test for that.
123 parts = datetime.now(timezone.utc).isoformat().split('+')
124 if len(parts) == 1: 124 ↛ 125line 124 didn't jump to line 125, because the condition on line 124 was never true
125 if parts[0][-1] == 'Z':
126 return parts[0] # Our ISO8601 already ends with a Z for Zulu/UTC time
127 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there
128 if parts[1] == '00:00': 128 ↛ 130line 128 didn't jump to line 130, because the condition on line 128 was never false
129 return f'{parts[0]}Z' # It doesn't end with a Z so we put one there
130 return f'{parts[0]}+{parts[1]}' # Fallback publishes the +TZ, whatever that was
132def mapping_component():
133 """Return a mappings component template"""
134 return {'mappings': MAPPING}
136def mounted_name(index, tier):
137 """Return a value for renamed_index for mounting a searchable snapshot index"""
138 return f'{TIER[tier]["prefix"]}-{index}'
140def posmatch(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[int]:
141 """
142 Compare the values in compare with original. Return a list of index positions from
143 compare of any values that match.
144 """
145 logger = getlogger(__name__)
146 positions = []
147 for orig in original:
148 for idx, comp in enumerate(compare):
149 if comp == orig:
150 logger.debug('Value %s found in both original and compare', comp)
151 positions.append(idx)
152 return positions
154def randomstr(length: int=16, lowercase: bool=False):
155 """Generate a random string"""
156 letters = string.ascii_uppercase
157 if lowercase: 157 ↛ 159line 157 didn't jump to line 159, because the condition on line 157 was never false
158 letters = string.ascii_lowercase
159 return str(''.join(random.choices(letters + string.digits, k=length)))
161def remove_by_index(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]:
162 """By list index position, remove entries from compare which also exist in original"""
163 logger = getlogger(__name__)
164 positions = posmatch(original, compare)
165 if not positions:
166 logger.debug('No matching values to remove')
167 return compare
168 # If you del list[0] first, you get a new list[0]
169 # We need to delete from the highest index value to the lowest
170 positions.sort() # Sort first to ensure lowest to highest order
171 for idx in reversed(positions): # Reverse the list and iterate
172 del compare[idx] # Delete the value at position idx
173 return compare
175def setting_component(ilm_policy: str=None, rollover_alias: str=None):
176 """Return a settings component template"""
177 val = {'settings':{'index.number_of_replicas': 0}}
178 if ilm_policy:
179 val['settings']['index.lifecycle.name'] = ilm_policy
180 if rollover_alias:
181 val['settings']['index.lifecycle.rollover_alias'] = rollover_alias
182 return val
184def storage_type(tier):
185 """Return the storage type of a searchable snapshot by tier"""
186 return TIER[tier]["storage"]
188def uniq_values(original: t.Sequence[str], compare: t.Sequence[str]) -> t.Sequence[str]:
189 """Return any values unique to list 'compare'"""
190 logger = getlogger(__name__)
191 # Use deepcopy of compare so we don't change the original
192 uniq = remove_by_index(original, deepcopy(compare))
193 if uniq:
194 logger.debug('Values found only in compare: %s', uniq)
195 return uniq