docs for muutils v0.8.2
View Source on GitHub

muutils.misc.sequence


  1from __future__ import annotations
  2
  3from typing import (
  4    Iterable,
  5    Any,
  6    Generator,
  7    Callable,
  8    Union,
  9)
 10
 11import typing
 12from typing import (
 13    Literal,
 14    Mapping,
 15)
 16
 17
 18WhenMissing = Literal["except", "skip", "include"]
 19
 20
 21def empty_sequence_if_attr_false(
 22    itr: Iterable[Any],
 23    attr_owner: Any,
 24    attr_name: str,
 25) -> Iterable[Any]:
 26    """Returns `itr` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`. Returns an empty sequence otherwise.
 27
 28    Particularly useful for optionally inserting delimiters into a sequence depending on an `TokenizerElement` attribute.
 29
 30    # Parameters:
 31    - `itr: Iterable[Any]`
 32        The iterable to return if the attribute is `True`.
 33    - `attr_owner: Any`
 34        The object to check for the attribute.
 35    - `attr_name: str`
 36        The name of the attribute to check.
 37
 38    # Returns:
 39    - `itr: Iterable` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`, otherwise an empty sequence.
 40    - `()` an empty sequence if the attribute is `False` or not present.
 41    """
 42    return itr if bool(getattr(attr_owner, attr_name, False)) else ()
 43
 44
 45def flatten(it: Iterable[Any], levels_to_flatten: int | None = None) -> Generator:
 46    """
 47    Flattens an arbitrarily nested iterable.
 48    Flattens all iterable data types except for `str` and `bytes`.
 49
 50    # Returns
 51    Generator over the flattened sequence.
 52
 53    # Parameters
 54    - `it`: Any arbitrarily nested iterable.
 55    - `levels_to_flatten`: Number of levels to flatten by, starting at the outermost layer. If `None`, performs full flattening.
 56    """
 57    for x in it:
 58        # TODO: swap type check with more general check for __iter__() or __next__() or whatever
 59        if (
 60            hasattr(x, "__iter__")
 61            and not isinstance(x, (str, bytes))
 62            and (levels_to_flatten is None or levels_to_flatten > 0)
 63        ):
 64            yield from flatten(
 65                x, None if levels_to_flatten is None else levels_to_flatten - 1
 66            )
 67        else:
 68            yield x
 69
 70
 71# string-like operations on lists
 72# --------------------------------------------------------------------------------
 73
 74
 75def list_split(lst: list, val: Any) -> list[list]:
 76    """split a list into sublists by `val`. similar to "a_b_c".split("_")
 77
 78    ```python
 79    >>> list_split([1,2,3,0,4,5,0,6], 0)
 80    [[1, 2, 3], [4, 5], [6]]
 81    >>> list_split([0,1,2,3], 0)
 82    [[], [1, 2, 3]]
 83    >>> list_split([1,2,3], 0)
 84    [[1, 2, 3]]
 85    >>> list_split([], 0)
 86    [[]]
 87    ```
 88
 89    """
 90
 91    if len(lst) == 0:
 92        return [[]]
 93
 94    output: list[list] = [
 95        [],
 96    ]
 97
 98    for x in lst:
 99        if x == val:
100            output.append([])
101        else:
102            output[-1].append(x)
103    return output
104
105
106def list_join(lst: list, factory: Callable) -> list:
107    """add a *new* instance of `factory()` between each element of `lst`
108
109    ```python
110    >>> list_join([1,2,3], lambda : 0)
111    [1,0,2,0,3]
112    >>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1])
113    [1, 1600000000.0, 2, 1600000000.1, 3]
114    ```
115    """
116
117    if len(lst) == 0:
118        return []
119
120    output: list = [
121        lst[0],
122    ]
123
124    for x in lst[1:]:
125        output.append(factory())
126        output.append(x)
127
128    return output
129
130
131# applying mappings
132# --------------------------------------------------------------------------------
133
134_AM_K = typing.TypeVar("_AM_K")
135_AM_V = typing.TypeVar("_AM_V")
136
137
138def apply_mapping(
139    mapping: Mapping[_AM_K, _AM_V],
140    iter: Iterable[_AM_K],
141    when_missing: WhenMissing = "skip",
142) -> list[Union[_AM_K, _AM_V]]:
143    """Given an iterable and a mapping, apply the mapping to the iterable with certain options
144
145    Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered.
146
147    Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict
148
149    # Parameters:
150     - `mapping : Mapping[_AM_K, _AM_V]`
151        must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `_AM_V`
152     - `iter : Iterable[_AM_K]`
153        the iterable to apply the mapping to
154     - `when_missing : WhenMissing`
155        what to do when a key is missing from the mapping -- this is what distinguishes this function from `map`
156        you can choose from `"skip"`, `"include"` (without converting), and `"except"`
157       (defaults to `"skip"`)
158
159    # Returns:
160    return type is one of:
161     - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"`
162     - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"`
163
164    # Raises:
165     - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"`
166     - `ValueError` : if `when_missing` is invalid
167    """
168    output: list[Union[_AM_K, _AM_V]] = list()
169    item: _AM_K
170    for item in iter:
171        if item in mapping:
172            output.append(mapping[item])
173            continue
174        if when_missing == "skip":
175            continue
176        elif when_missing == "include":
177            output.append(item)
178        elif when_missing == "except":
179            raise KeyError(f"item {item} is missing from mapping {mapping}")
180        else:
181            raise ValueError(
182                f"invalid value for {when_missing = }\n{item = }\n{mapping = }"
183            )
184    return output
185
186
187def apply_mapping_chain(
188    mapping: Mapping[_AM_K, Iterable[_AM_V]],
189    iter: Iterable[_AM_K],
190    when_missing: WhenMissing = "skip",
191) -> list[Union[_AM_K, _AM_V]]:
192    """Given an iterable and a mapping, chain the mappings together
193
194    Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered.
195
196    Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict
197
198    # Parameters:
199    - `mapping : Mapping[_AM_K, Iterable[_AM_V]]`
200        must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `Iterable[_AM_V]`
201    - `iter : Iterable[_AM_K]`
202        the iterable to apply the mapping to
203    - `when_missing : WhenMissing`
204        what to do when a key is missing from the mapping -- this is what distinguishes this function from `map`
205        you can choose from `"skip"`, `"include"` (without converting), and `"except"`
206    (defaults to `"skip"`)
207
208    # Returns:
209    return type is one of:
210     - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"`
211     - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"`
212
213    # Raises:
214    - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"`
215    - `ValueError` : if `when_missing` is invalid
216
217    """
218    output: list[Union[_AM_K, _AM_V]] = list()
219    item: _AM_K
220    for item in iter:
221        if item in mapping:
222            output.extend(mapping[item])
223            continue
224        if when_missing == "skip":
225            continue
226        elif when_missing == "include":
227            output.append(item)
228        elif when_missing == "except":
229            raise KeyError(f"item {item} is missing from mapping {mapping}")
230        else:
231            raise ValueError(
232                f"invalid value for {when_missing = }\n{item = }\n{mapping = }"
233            )
234    return output

WhenMissing = typing.Literal['except', 'skip', 'include']
def empty_sequence_if_attr_false(itr: Iterable[Any], attr_owner: Any, attr_name: str) -> Iterable[Any]:
22def empty_sequence_if_attr_false(
23    itr: Iterable[Any],
24    attr_owner: Any,
25    attr_name: str,
26) -> Iterable[Any]:
27    """Returns `itr` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`. Returns an empty sequence otherwise.
28
29    Particularly useful for optionally inserting delimiters into a sequence depending on an `TokenizerElement` attribute.
30
31    # Parameters:
32    - `itr: Iterable[Any]`
33        The iterable to return if the attribute is `True`.
34    - `attr_owner: Any`
35        The object to check for the attribute.
36    - `attr_name: str`
37        The name of the attribute to check.
38
39    # Returns:
40    - `itr: Iterable` if `attr_owner` has the attribute `attr_name` and it boolean casts to `True`, otherwise an empty sequence.
41    - `()` an empty sequence if the attribute is `False` or not present.
42    """
43    return itr if bool(getattr(attr_owner, attr_name, False)) else ()

Returns itr if attr_owner has the attribute attr_name and it boolean casts to True. Returns an empty sequence otherwise.

Particularly useful for optionally inserting delimiters into a sequence depending on an TokenizerElement attribute.

Parameters:

  • itr: Iterable[Any] The iterable to return if the attribute is True.
  • attr_owner: Any The object to check for the attribute.
  • attr_name: str The name of the attribute to check.

Returns:

  • itr: Iterable if attr_owner has the attribute attr_name and it boolean casts to True, otherwise an empty sequence.
  • () an empty sequence if the attribute is False or not present.
def flatten(it: Iterable[Any], levels_to_flatten: int | None = None) -> Generator:
46def flatten(it: Iterable[Any], levels_to_flatten: int | None = None) -> Generator:
47    """
48    Flattens an arbitrarily nested iterable.
49    Flattens all iterable data types except for `str` and `bytes`.
50
51    # Returns
52    Generator over the flattened sequence.
53
54    # Parameters
55    - `it`: Any arbitrarily nested iterable.
56    - `levels_to_flatten`: Number of levels to flatten by, starting at the outermost layer. If `None`, performs full flattening.
57    """
58    for x in it:
59        # TODO: swap type check with more general check for __iter__() or __next__() or whatever
60        if (
61            hasattr(x, "__iter__")
62            and not isinstance(x, (str, bytes))
63            and (levels_to_flatten is None or levels_to_flatten > 0)
64        ):
65            yield from flatten(
66                x, None if levels_to_flatten is None else levels_to_flatten - 1
67            )
68        else:
69            yield x

Flattens an arbitrarily nested iterable. Flattens all iterable data types except for str and bytes.

Returns

Generator over the flattened sequence.

Parameters

  • it: Any arbitrarily nested iterable.
  • levels_to_flatten: Number of levels to flatten by, starting at the outermost layer. If None, performs full flattening.
def list_split(lst: list, val: Any) -> list[list]:
 76def list_split(lst: list, val: Any) -> list[list]:
 77    """split a list into sublists by `val`. similar to "a_b_c".split("_")
 78
 79    ```python
 80    >>> list_split([1,2,3,0,4,5,0,6], 0)
 81    [[1, 2, 3], [4, 5], [6]]
 82    >>> list_split([0,1,2,3], 0)
 83    [[], [1, 2, 3]]
 84    >>> list_split([1,2,3], 0)
 85    [[1, 2, 3]]
 86    >>> list_split([], 0)
 87    [[]]
 88    ```
 89
 90    """
 91
 92    if len(lst) == 0:
 93        return [[]]
 94
 95    output: list[list] = [
 96        [],
 97    ]
 98
 99    for x in lst:
100        if x == val:
101            output.append([])
102        else:
103            output[-1].append(x)
104    return output

split a list into sublists by val. similar to "a_b_c".split("_")

>>> list_split([1,2,3,0,4,5,0,6], 0)
[[1, 2, 3], [4, 5], [6]]
>>> list_split([0,1,2,3], 0)
[[], [1, 2, 3]]
>>> list_split([1,2,3], 0)
[[1, 2, 3]]
>>> list_split([], 0)
[[]]
def list_join(lst: list, factory: Callable) -> list:
107def list_join(lst: list, factory: Callable) -> list:
108    """add a *new* instance of `factory()` between each element of `lst`
109
110    ```python
111    >>> list_join([1,2,3], lambda : 0)
112    [1,0,2,0,3]
113    >>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1])
114    [1, 1600000000.0, 2, 1600000000.1, 3]
115    ```
116    """
117
118    if len(lst) == 0:
119        return []
120
121    output: list = [
122        lst[0],
123    ]
124
125    for x in lst[1:]:
126        output.append(factory())
127        output.append(x)
128
129    return output

add a new instance of factory() between each element of lst

>>> list_join([1,2,3], lambda : 0)
[1,0,2,0,3]
>>> list_join([1,2,3], lambda: [time.sleep(0.1), time.time()][1])
[1, 1600000000.0, 2, 1600000000.1, 3]
def apply_mapping( mapping: Mapping[~_AM_K, ~_AM_V], iter: Iterable[~_AM_K], when_missing: Literal['except', 'skip', 'include'] = 'skip') -> list[typing.Union[~_AM_K, ~_AM_V]]:
139def apply_mapping(
140    mapping: Mapping[_AM_K, _AM_V],
141    iter: Iterable[_AM_K],
142    when_missing: WhenMissing = "skip",
143) -> list[Union[_AM_K, _AM_V]]:
144    """Given an iterable and a mapping, apply the mapping to the iterable with certain options
145
146    Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered.
147
148    Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict
149
150    # Parameters:
151     - `mapping : Mapping[_AM_K, _AM_V]`
152        must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `_AM_V`
153     - `iter : Iterable[_AM_K]`
154        the iterable to apply the mapping to
155     - `when_missing : WhenMissing`
156        what to do when a key is missing from the mapping -- this is what distinguishes this function from `map`
157        you can choose from `"skip"`, `"include"` (without converting), and `"except"`
158       (defaults to `"skip"`)
159
160    # Returns:
161    return type is one of:
162     - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"`
163     - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"`
164
165    # Raises:
166     - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"`
167     - `ValueError` : if `when_missing` is invalid
168    """
169    output: list[Union[_AM_K, _AM_V]] = list()
170    item: _AM_K
171    for item in iter:
172        if item in mapping:
173            output.append(mapping[item])
174            continue
175        if when_missing == "skip":
176            continue
177        elif when_missing == "include":
178            output.append(item)
179        elif when_missing == "except":
180            raise KeyError(f"item {item} is missing from mapping {mapping}")
181        else:
182            raise ValueError(
183                f"invalid value for {when_missing = }\n{item = }\n{mapping = }"
184            )
185    return output

Given an iterable and a mapping, apply the mapping to the iterable with certain options

Gotcha: if when_missing is invalid, this is totally fine until a missing key is actually encountered.

Note: you can use this with muutils.kappa.Kappa if you want to pass a function instead of a dict

Parameters:

  • mapping : Mapping[_AM_K, _AM_V] must have __contains__ and __getitem__, both of which take _AM_K and the latter returns _AM_V
  • iter : Iterable[_AM_K] the iterable to apply the mapping to
  • when_missing : WhenMissing what to do when a key is missing from the mapping -- this is what distinguishes this function from map you can choose from "skip", "include" (without converting), and "except" (defaults to "skip")

Returns:

return type is one of:

  • list[_AM_V] if when_missing is "skip" or "except"
  • list[Union[_AM_K, _AM_V]] if when_missing is "include"

Raises:

  • KeyError : if the item is missing from the mapping and when_missing is "except"
  • ValueError : if when_missing is invalid
def apply_mapping_chain( mapping: Mapping[~_AM_K, Iterable[~_AM_V]], iter: Iterable[~_AM_K], when_missing: Literal['except', 'skip', 'include'] = 'skip') -> list[typing.Union[~_AM_K, ~_AM_V]]:
188def apply_mapping_chain(
189    mapping: Mapping[_AM_K, Iterable[_AM_V]],
190    iter: Iterable[_AM_K],
191    when_missing: WhenMissing = "skip",
192) -> list[Union[_AM_K, _AM_V]]:
193    """Given an iterable and a mapping, chain the mappings together
194
195    Gotcha: if `when_missing` is invalid, this is totally fine until a missing key is actually encountered.
196
197    Note: you can use this with `muutils.kappa.Kappa` if you want to pass a function instead of a dict
198
199    # Parameters:
200    - `mapping : Mapping[_AM_K, Iterable[_AM_V]]`
201        must have `__contains__` and `__getitem__`, both of which take `_AM_K` and the latter returns `Iterable[_AM_V]`
202    - `iter : Iterable[_AM_K]`
203        the iterable to apply the mapping to
204    - `when_missing : WhenMissing`
205        what to do when a key is missing from the mapping -- this is what distinguishes this function from `map`
206        you can choose from `"skip"`, `"include"` (without converting), and `"except"`
207    (defaults to `"skip"`)
208
209    # Returns:
210    return type is one of:
211     - `list[_AM_V]` if `when_missing` is `"skip"` or `"except"`
212     - `list[Union[_AM_K, _AM_V]]` if `when_missing` is `"include"`
213
214    # Raises:
215    - `KeyError` : if the item is missing from the mapping and `when_missing` is `"except"`
216    - `ValueError` : if `when_missing` is invalid
217
218    """
219    output: list[Union[_AM_K, _AM_V]] = list()
220    item: _AM_K
221    for item in iter:
222        if item in mapping:
223            output.extend(mapping[item])
224            continue
225        if when_missing == "skip":
226            continue
227        elif when_missing == "include":
228            output.append(item)
229        elif when_missing == "except":
230            raise KeyError(f"item {item} is missing from mapping {mapping}")
231        else:
232            raise ValueError(
233                f"invalid value for {when_missing = }\n{item = }\n{mapping = }"
234            )
235    return output

Given an iterable and a mapping, chain the mappings together

Gotcha: if when_missing is invalid, this is totally fine until a missing key is actually encountered.

Note: you can use this with muutils.kappa.Kappa if you want to pass a function instead of a dict

Parameters:

  • mapping : Mapping[_AM_K, Iterable[_AM_V]] must have __contains__ and __getitem__, both of which take _AM_K and the latter returns Iterable[_AM_V]
  • iter : Iterable[_AM_K] the iterable to apply the mapping to
  • when_missing : WhenMissing what to do when a key is missing from the mapping -- this is what distinguishes this function from map you can choose from "skip", "include" (without converting), and "except" (defaults to "skip")

Returns:

return type is one of:

  • list[_AM_V] if when_missing is "skip" or "except"
  • list[Union[_AM_K, _AM_V]] if when_missing is "include"

Raises:

  • KeyError : if the item is missing from the mapping and when_missing is "except"
  • ValueError : if when_missing is invalid