Coverage for src/extratools_core/seq/subseq.py: 0%
62 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-04 06:07 -0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-04 06:07 -0700
1import math
2from collections.abc import Callable, Iterable, Sequence
3from functools import cache
4from itertools import chain, starmap
6from toolz.itertoolz import count
8from ..typing import Comparable
9from . import iter_to_seq
12def best_subseq[T](
13 a: Iterable[T],
14 key: Callable[[Iterable[T]], Comparable],
15) -> Iterable[T]:
16 s: Sequence = iter_to_seq(a)
18 return max(
19 chain([[]], (
20 s[i:j]
21 for i in range(len(s))
22 for j in range(i + 1, len(s) + 1)
23 )),
24 key=key,
25 )
28def common_subseq[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]:
29 @cache
30 # Find the start pos in list `a`
31 def align_rec(alen: int, blen: int) -> int:
32 if alen == 0 or blen == 0 or aseq[alen - 1] != bseq[blen - 1]:
33 return alen
35 return align_rec(alen - 1, blen - 1)
37 aseq: Sequence[T] = iter_to_seq(a)
38 bseq: Sequence[T] = iter_to_seq(b)
40 for k in range(*max(
41 (
42 (align_rec(i, j), i)
43 for i in range(len(aseq) + 1)
44 for j in range(len(bseq) + 1)
45 ),
46 key=lambda x: x[1] - x[0],
47 )):
48 yield aseq[k]
51def is_subseq[T](a: Iterable[T], b: Iterable[T]) -> bool:
52 aseq: Sequence[T] = iter_to_seq(a)
53 return count(common_subseq(aseq, b)) == len(aseq)
56def best_subseq_with_gaps[T](
57 a: Iterable[T],
58 key: Callable[[Iterable[T]], Comparable],
59) -> Iterable[T]:
60 def find(alen: int) -> tuple[Comparable, list[T]]:
61 if alen == 0:
62 return (key([]), [])
64 prevcost: Comparable
65 prevseq: list[T]
66 prevcost, prevseq = find(alen - 1)
68 currseq: list[T] = [*prevseq, b[alen - 1]]
70 return max(
71 (prevcost, prevseq),
72 (key(currseq), currseq),
73 key=lambda x: x[0],
74 )
76 b: Sequence[T] = iter_to_seq(a)
77 return find(len(b))[1]
80def common_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]:
81 alignment: tuple[float, tuple[Iterable[T | None], Iterable[T | None]]] | None = align(a, b)
82 if alignment is None:
83 # Alignment cannot be `None` as we do not have cost bound
84 raise RuntimeError
86 return (
87 x
88 for x, y in zip(
89 *(alignment[1]),
90 strict=False,
91 )
92 # Actually x and y cannot both be None
93 if x is not None and x == y
94 )
97def is_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> bool:
98 aseq: Sequence[T] = iter_to_seq(a)
99 return count(common_subseq_with_gaps(aseq, b)) == len(aseq)
102def align[T](
103 a: Iterable[T],
104 b: Iterable[T],
105 *,
106 cost: Callable[[T, T], float] | None = None,
107 bound: float = math.inf,
108 default: T = None,
109) -> tuple[float, tuple[Iterable[T | None], Iterable[T | None]]] | None:
110 def merge(
111 prev: tuple[float, tuple[Sequence[T | None], Sequence[T | None]]] | None,
112 curr: tuple[T, T],
113 ) -> tuple[float, tuple[Sequence[T | None], Sequence[T | None]]] | None:
114 if prev is None:
115 return None
117 prevcost: float
118 u: Sequence[T | None]
119 v: Sequence[T | None]
120 prevcost, (u, v) = prev
121 x: T
122 y: T
123 x, y = curr
125 currcost: float = prevcost + costfunc(x, y)
126 if currcost > bound:
127 return None
129 return currcost, ([*u, x], [*v, y])
131 @cache
132 def align_rec(alen: int, blen: int) -> tuple[
133 float,
134 tuple[Sequence[T | None], Sequence[T | None]],
135 ] | None:
136 if alen == 0 or blen == 0:
137 res: tuple[Sequence[T], Sequence[T]] = (
138 [default] * blen, bseq[:blen],
139 ) if alen == 0 else (
140 aseq[:alen], [default] * alen,
141 )
143 return (
144 sum(starmap(costfunc, zip(*res, strict=False))),
145 res,
146 )
148 return min(
149 (
150 merge(align_rec(alen - 1, blen), (aseq[alen - 1], default)),
151 merge(align_rec(alen, blen - 1), (default, bseq[blen - 1])),
152 merge(align_rec(alen - 1, blen - 1), (aseq[alen - 1], bseq[blen - 1])),
153 ),
154 key=lambda x: x[0] if x else math.inf,
155 default=None,
156 )
158 def default_cost(x: T, y: T) -> float:
159 return 0 if x == y else 1
161 aseq: Sequence[T] = iter_to_seq(a)
162 bseq: Sequence[T] = iter_to_seq(b)
164 costfunc: Callable[[T, T], float] = cost or default_cost
166 return align_rec(len(aseq), len(bseq))