muutils.misc.sequence
1from __future__ import annotations 2 3from typing import ( 4 Iterable, 5 Any, 6 Generator, 7 Callable, 8 Union, 9) 10 11import typing 12from typing import ( 13 Literal, 14 Mapping, 15) 16 17 18WhenMissing = Literal["except", "skip", "include"] 19 20 21def empty_sequence_if_attr_false( 22 itr: Iterable[Any], 23 attr_owner: Any, 24 attr_name: str, 25) -> Iterable[Any]: 26 """Returns `itr` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`. Returns an empty sequence otherwise. 27 28 Particularly useful for optionally inserting delimiters into a sequence depending on an `TokenizerElement` attribute. 29 30 # Parameters: 31 - `itr: Iterable[Any]` 32 The iterable to return if the attribute is `True`. 33 - `attr_owner: Any` 34 The object to check for the attribute. 35 - `attr_name: str` 36 The name of the attribute to check. 37 38 # Returns: 39 - `itr: Iterable` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`, otherwise an empty sequence. 40 - `()` an empty sequence if the attribute is `False` or not present. 41 """ 42 return itr if bool(getattr(attr_owner, attr_name, False)) else () 43 44 45def flatten(it: Iterable[Any], levels_to_flatten: int | None = None) -> Generator: 46 """ 47 Flattens an arbitrarily nested iterable. 48 Flattens all iterable data types except for `str` and `bytes`. 49 50 # Returns 51 Generator over the flattened sequence. 52 53 # Parameters 54 - `it`: Any arbitrarily nested iterable. 55 - `levels_to_flatten`: Number of levels to flatten by, starting at the outermost layer. If `None`, performs full flattening. 56 """ 57 for x in it: 58 # TODO: swap type check with more general check for __iter__() or __next__() or whatever 59 if ( 60 hasattr(x, "__iter__") 61 and not isinstance(x, (str, bytes)) 62 and (levels_to_flatten is None or levels_to_flatten > 0) 63 ): 64 yield from flatten( 65 x, None if levels_to_flatten is None else levels_to_flatten - 1 66 ) 67 else: 68 yield x 69 70 71# string-like operations on lists 72# -------------------------------------------------------------------------------- 73 74 75def list_split(lst: list, val: Any) -> list[list]: 76 """split a list into sublists by `val`. similar to "a_b_c".split("_") 77 78 ```python 79 >>> list_split([1,2,3,0,4,5,0,6], 0) 80 [[1, 2, 3], [4, 5], [6]] 81 >>> list_split([0,1,2,3], 0) 82 [[], [1, 2, 3]] 83 >>> list_split([1,2,3], 0) 84 [[1, 2, 3]] 85 >>> list_split([], 0) 86 [[]] 87 ``` 88 89 """ 90 91 if len(lst) == 0: 92 return [[]] 93 94 output: list[list] = [ 95 [], 96 ] 97 98 for x in lst: 99 if x == val: 100 output.append([]) 101 else: 102 output[-1].append(x) 103 return output 104 105 106def list_join(lst: list, factory: Callable) -> list: 107 """add a *new* instance of `factory()` between each element of `lst` 108 109 ```python 110 >>> list_join([1,2,3], lambda : 0) 111 [1,0,2,0,3] 112 >>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1]) 113 [1, 1600000000.0, 2, 1600000000.1, 3] 114 ``` 115 """ 116 117 if len(lst) == 0: 118 return [] 119 120 output: list = [ 121 lst[0], 122 ] 123 124 for x in lst[1:]: 125 output.append(factory()) 126 output.append(x) 127 128 return output 129 130 131# applying mappings 132# -------------------------------------------------------------------------------- 133 134_AM_K = typing.TypeVar("_AM_K") 135_AM_V = typing.TypeVar("_AM_V") 136 137 138def apply_mapping( 139 mapping: Mapping[_AM_K, _AM_V], 140 iter: Iterable[_AM_K], 141 when_missing: WhenMissing = "skip", 142) -> list[Union[_AM_K, _AM_V]]: 143 """Given an iterable and a mapping, apply the mapping to the iterable with certain options 144 145 Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered. 146 147 Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict 148 149 # Parameters: 150 - `mapping : Mapping[_AM_K, _AM_V]` 151 must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `_AM_V` 152 - `iter : Iterable[_AM_K]` 153 the iterable to apply the mapping to 154 - `when_missing : WhenMissing` 155 what to do when a key is missing from the mapping -- this is what distinguishes this function from `map` 156 you can choose from `"skip"`, `"include"` (without converting), and `"except"` 157 (defaults to `"skip"`) 158 159 # Returns: 160 return type is one of: 161 - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"` 162 - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"` 163 164 # Raises: 165 - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"` 166 - `ValueError` : if `when_missing` is invalid 167 """ 168 output: list[Union[_AM_K, _AM_V]] = list() 169 item: _AM_K 170 for item in iter: 171 if item in mapping: 172 output.append(mapping[item]) 173 continue 174 if when_missing == "skip": 175 continue 176 elif when_missing == "include": 177 output.append(item) 178 elif when_missing == "except": 179 raise KeyError(f"item {item} is missing from mapping {mapping}") 180 else: 181 raise ValueError( 182 f"invalid value for {when_missing = }\n{item = }\n{mapping = }" 183 ) 184 return output 185 186 187def apply_mapping_chain( 188 mapping: Mapping[_AM_K, Iterable[_AM_V]], 189 iter: Iterable[_AM_K], 190 when_missing: WhenMissing = "skip", 191) -> list[Union[_AM_K, _AM_V]]: 192 """Given an iterable and a mapping, chain the mappings together 193 194 Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered. 195 196 Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict 197 198 # Parameters: 199 - `mapping : Mapping[_AM_K, Iterable[_AM_V]]` 200 must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `Iterable[_AM_V]` 201 - `iter : Iterable[_AM_K]` 202 the iterable to apply the mapping to 203 - `when_missing : WhenMissing` 204 what to do when a key is missing from the mapping -- this is what distinguishes this function from `map` 205 you can choose from `"skip"`, `"include"` (without converting), and `"except"` 206 (defaults to `"skip"`) 207 208 # Returns: 209 return type is one of: 210 - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"` 211 - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"` 212 213 # Raises: 214 - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"` 215 - `ValueError` : if `when_missing` is invalid 216 217 """ 218 output: list[Union[_AM_K, _AM_V]] = list() 219 item: _AM_K 220 for item in iter: 221 if item in mapping: 222 output.extend(mapping[item]) 223 continue 224 if when_missing == "skip": 225 continue 226 elif when_missing == "include": 227 output.append(item) 228 elif when_missing == "except": 229 raise KeyError(f"item {item} is missing from mapping {mapping}") 230 else: 231 raise ValueError( 232 f"invalid value for {when_missing = }\n{item = }\n{mapping = }" 233 ) 234 return output
22def empty_sequence_if_attr_false( 23 itr: Iterable[Any], 24 attr_owner: Any, 25 attr_name: str, 26) -> Iterable[Any]: 27 """Returns `itr` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`. Returns an empty sequence otherwise. 28 29 Particularly useful for optionally inserting delimiters into a sequence depending on an `TokenizerElement` attribute. 30 31 # Parameters: 32 - `itr: Iterable[Any]` 33 The iterable to return if the attribute is `True`. 34 - `attr_owner: Any` 35 The object to check for the attribute. 36 - `attr_name: str` 37 The name of the attribute to check. 38 39 # Returns: 40 - `itr: Iterable` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`, otherwise an empty sequence. 41 - `()` an empty sequence if the attribute is `False` or not present. 42 """ 43 return itr if bool(getattr(attr_owner, attr_name, False)) else ()
Returns itr
if attr_owner
has the attribute attr_name
and it boolean casts to True
. Returns an empty sequence otherwise.
Particularly useful for optionally inserting delimiters into a sequence depending on an TokenizerElement
attribute.
Parameters:
itr: Iterable[Any]
The iterable to return if the attribute isTrue
.attr_owner: Any
The object to check for the attribute.attr_name: str
The name of the attribute to check.
Returns:
itr: Iterable
ifattr_owner
has the attributeattr_name
and it boolean casts toTrue
, otherwise an empty sequence.()
an empty sequence if the attribute isFalse
or not present.
46def flatten(it: Iterable[Any], levels_to_flatten: int | None = None) -> Generator: 47 """ 48 Flattens an arbitrarily nested iterable. 49 Flattens all iterable data types except for `str` and `bytes`. 50 51 # Returns 52 Generator over the flattened sequence. 53 54 # Parameters 55 - `it`: Any arbitrarily nested iterable. 56 - `levels_to_flatten`: Number of levels to flatten by, starting at the outermost layer. If `None`, performs full flattening. 57 """ 58 for x in it: 59 # TODO: swap type check with more general check for __iter__() or __next__() or whatever 60 if ( 61 hasattr(x, "__iter__") 62 and not isinstance(x, (str, bytes)) 63 and (levels_to_flatten is None or levels_to_flatten > 0) 64 ): 65 yield from flatten( 66 x, None if levels_to_flatten is None else levels_to_flatten - 1 67 ) 68 else: 69 yield x
Flattens an arbitrarily nested iterable.
Flattens all iterable data types except for str
and bytes
.
Returns
Generator over the flattened sequence.
Parameters
it
: Any arbitrarily nested iterable.levels_to_flatten
: Number of levels to flatten by, starting at the outermost layer. IfNone
, performs full flattening.
76def list_split(lst: list, val: Any) -> list[list]: 77 """split a list into sublists by `val`. similar to "a_b_c".split("_") 78 79 ```python 80 >>> list_split([1,2,3,0,4,5,0,6], 0) 81 [[1, 2, 3], [4, 5], [6]] 82 >>> list_split([0,1,2,3], 0) 83 [[], [1, 2, 3]] 84 >>> list_split([1,2,3], 0) 85 [[1, 2, 3]] 86 >>> list_split([], 0) 87 [[]] 88 ``` 89 90 """ 91 92 if len(lst) == 0: 93 return [[]] 94 95 output: list[list] = [ 96 [], 97 ] 98 99 for x in lst: 100 if x == val: 101 output.append([]) 102 else: 103 output[-1].append(x) 104 return output
split a list into sublists by val
. similar to "a_b_c".split("_")
>>> list_split([1,2,3,0,4,5,0,6], 0)
[[1, 2, 3], [4, 5], [6]]
>>> list_split([0,1,2,3], 0)
[[], [1, 2, 3]]
>>> list_split([1,2,3], 0)
[[1, 2, 3]]
>>> list_split([], 0)
[[]]
107def list_join(lst: list, factory: Callable) -> list: 108 """add a *new* instance of `factory()` between each element of `lst` 109 110 ```python 111 >>> list_join([1,2,3], lambda : 0) 112 [1,0,2,0,3] 113 >>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1]) 114 [1, 1600000000.0, 2, 1600000000.1, 3] 115 ``` 116 """ 117 118 if len(lst) == 0: 119 return [] 120 121 output: list = [ 122 lst[0], 123 ] 124 125 for x in lst[1:]: 126 output.append(factory()) 127 output.append(x) 128 129 return output
add a new instance of factory()
between each element of lst
>>> list_join([1,2,3], lambda : 0)
[1,0,2,0,3]
>>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1])
[1, 1600000000.0, 2, 1600000000.1, 3]
139def apply_mapping( 140 mapping: Mapping[_AM_K, _AM_V], 141 iter: Iterable[_AM_K], 142 when_missing: WhenMissing = "skip", 143) -> list[Union[_AM_K, _AM_V]]: 144 """Given an iterable and a mapping, apply the mapping to the iterable with certain options 145 146 Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered. 147 148 Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict 149 150 # Parameters: 151 - `mapping : Mapping[_AM_K, _AM_V]` 152 must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `_AM_V` 153 - `iter : Iterable[_AM_K]` 154 the iterable to apply the mapping to 155 - `when_missing : WhenMissing` 156 what to do when a key is missing from the mapping -- this is what distinguishes this function from `map` 157 you can choose from `"skip"`, `"include"` (without converting), and `"except"` 158 (defaults to `"skip"`) 159 160 # Returns: 161 return type is one of: 162 - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"` 163 - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"` 164 165 # Raises: 166 - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"` 167 - `ValueError` : if `when_missing` is invalid 168 """ 169 output: list[Union[_AM_K, _AM_V]] = list() 170 item: _AM_K 171 for item in iter: 172 if item in mapping: 173 output.append(mapping[item]) 174 continue 175 if when_missing == "skip": 176 continue 177 elif when_missing == "include": 178 output.append(item) 179 elif when_missing == "except": 180 raise KeyError(f"item {item} is missing from mapping {mapping}") 181 else: 182 raise ValueError( 183 f"invalid value for {when_missing = }\n{item = }\n{mapping = }" 184 ) 185 return output
Given an iterable and a mapping, apply the mapping to the iterable with certain options
Gotcha: if when_missing
is invalid, this is totally fine until a missing key is actually encountered.
Note: you can use this with muutils.kappa.Kappa
if you want to pass a function instead of a dict
Parameters:
mapping : Mapping[_AM_K, _AM_V]
must have__contains__
and__getitem__
, both of which take_AM_K
and the latter returns_AM_V
iter : Iterable[_AM_K]
the iterable to apply the mapping towhen_missing : WhenMissing
what to do when a key is missing from the mapping -- this is what distinguishes this function frommap
you can choose from"skip"
,"include"
(without converting), and"except"
(defaults to"skip"
)
Returns:
return type is one of:
list[_AM_V]
ifwhen_missing
is"skip"
or"except"
list[Union[_AM_K, _AM_V]]
ifwhen_missing
is"include"
Raises:
KeyError
: if the item is missing from the mapping andwhen_missing
is"except"
ValueError
: ifwhen_missing
is invalid
188def apply_mapping_chain( 189 mapping: Mapping[_AM_K, Iterable[_AM_V]], 190 iter: Iterable[_AM_K], 191 when_missing: WhenMissing = "skip", 192) -> list[Union[_AM_K, _AM_V]]: 193 """Given an iterable and a mapping, chain the mappings together 194 195 Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered. 196 197 Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict 198 199 # Parameters: 200 - `mapping : Mapping[_AM_K, Iterable[_AM_V]]` 201 must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `Iterable[_AM_V]` 202 - `iter : Iterable[_AM_K]` 203 the iterable to apply the mapping to 204 - `when_missing : WhenMissing` 205 what to do when a key is missing from the mapping -- this is what distinguishes this function from `map` 206 you can choose from `"skip"`, `"include"` (without converting), and `"except"` 207 (defaults to `"skip"`) 208 209 # Returns: 210 return type is one of: 211 - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"` 212 - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"` 213 214 # Raises: 215 - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"` 216 - `ValueError` : if `when_missing` is invalid 217 218 """ 219 output: list[Union[_AM_K, _AM_V]] = list() 220 item: _AM_K 221 for item in iter: 222 if item in mapping: 223 output.extend(mapping[item]) 224 continue 225 if when_missing == "skip": 226 continue 227 elif when_missing == "include": 228 output.append(item) 229 elif when_missing == "except": 230 raise KeyError(f"item {item} is missing from mapping {mapping}") 231 else: 232 raise ValueError( 233 f"invalid value for {when_missing = }\n{item = }\n{mapping = }" 234 ) 235 return output
Given an iterable and a mapping, chain the mappings together
Gotcha: if when_missing
is invalid, this is totally fine until a missing key is actually encountered.
Note: you can use this with muutils.kappa.Kappa
if you want to pass a function instead of a dict
Parameters:
mapping : Mapping[_AM_K, Iterable[_AM_V]]
must have__contains__
and__getitem__
, both of which take_AM_K
and the latter returnsIterable[_AM_V]
iter : Iterable[_AM_K]
the iterable to apply the mapping towhen_missing : WhenMissing
what to do when a key is missing from the mapping -- this is what distinguishes this function frommap
you can choose from"skip"
,"include"
(without converting), and"except"
(defaults to"skip"
)
Returns:
return type is one of:
list[_AM_V]
ifwhen_missing
is"skip"
or"except"
list[Union[_AM_K, _AM_V]]
ifwhen_missing
is"include"
Raises:
KeyError
: if the item is missing from the mapping andwhen_missing
is"except"
ValueError
: ifwhen_missing
is invalid