Coverage for src/extratools_core/seq/subseq.py: 0%
55 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-05 23:54 -0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-05 23:54 -0700
1from collections.abc import Callable, Iterable, Sequence
2from functools import cache
3from itertools import chain, combinations
5from . import iter_to_seq
8def enumerate_subseqs[T](seq: Iterable[T]) -> Iterable[Sequence[T]]:
9 seq = iter_to_seq(seq)
10 seq_len: int = len(seq)
12 for i in range(seq_len):
13 for j in range(i + 1, seq_len + 1 if i > 0 else seq_len):
14 yield seq[i:j]
17def enumerate_subseqs_with_gaps[T](seq: Iterable[T]) -> Iterable[Sequence[T]]:
18 seq = iter_to_seq(seq)
20 for i in range(1, len(seq)):
21 yield from combinations(seq, i)
24def best_subseq[T](
25 a: Iterable[T],
26 score_func: Callable[[Iterable[T]], float],
27) -> Sequence[T]:
28 return max(
29 chain([[]], enumerate_subseqs(a)),
30 key=score_func,
31 )
34def best_subseq_with_gaps[T](
35 a: Iterable[T],
36 score_func: Callable[[Iterable[T]], float],
37) -> Sequence[T]:
38 return max(
39 chain([[]], enumerate_subseqs_with_gaps(a)),
40 key=score_func,
41 )
44def common_subseq[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]:
45 @cache
46 # Find the start pos in `a` for longest common subseq aligned from right to left
47 # between `a[:alen]` and `b[:blen]`
48 def align_rec(alen: int, blen: int) -> int:
49 if alen == 0 or blen == 0 or aseq[alen - 1] != bseq[blen - 1]:
50 return alen
52 return align_rec(alen - 1, blen - 1)
54 aseq: Sequence[T] = iter_to_seq(a)
55 bseq: Sequence[T] = iter_to_seq(b)
57 for k in range(*max(
58 (
59 (align_rec(i, j), i)
60 for i in range(len(aseq) + 1)
61 for j in range(len(bseq) + 1)
62 ),
63 key=lambda x: x[1] - x[0],
64 )):
65 yield aseq[k]
68def is_subseq[T](a: Iterable[T], b: Iterable[T]) -> bool:
69 aseq: Sequence[T] = iter_to_seq(a)
70 bseq: Sequence[T] = iter_to_seq(b)
72 if len(aseq) > len(bseq):
73 return False
75 return any(
76 aseq == bseq[j:j + len(aseq)]
77 for j in range(len(bseq) - len(aseq) + 1)
78 )
81def common_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]:
82 alignment: tuple[Iterable[T | None], Iterable[T | None]] = align(a, b)
84 return (
85 x
86 for x, y in zip(
87 *alignment,
88 strict=True,
89 )
90 if x is not None and y is not None
91 )
94def is_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> bool:
95 alignment: tuple[Iterable[T | None], Iterable[T | None]] = align(a, b)
97 return all(
98 y is not None
99 for y in alignment[1]
100 )
103def align[T](
104 a: Iterable[T],
105 b: Iterable[T],
106 *,
107 default: T | None = None,
108) -> tuple[Iterable[T | None], Iterable[T | None]]:
109 def merge(
110 prev: tuple[int, tuple[Sequence[T | None], Sequence[T | None]]],
111 curr: tuple[T | None, T | None],
112 ) -> tuple[int, tuple[Sequence[T | None], Sequence[T | None]]]:
113 prev_matches: int
114 u: Sequence[T | None]
115 v: Sequence[T | None]
116 prev_matches, (u, v) = prev
118 x: T | None
119 y: T | None
120 x, y = curr
122 return (prev_matches + 1) if x == y else prev_matches, ([*u, x], [*v, y])
124 @cache
125 def align_rec(alen: int, blen: int) -> tuple[
126 int,
127 tuple[Sequence[T | None], Sequence[T | None]],
128 ]:
129 if alen == 0:
130 return 0, (
131 [default] * blen, bseq[:blen],
132 )
133 if blen == 0:
134 return 0, (
135 aseq[:alen], [default] * alen,
136 )
138 return max(
139 (
140 merge(align_rec(alen - 1, blen), (aseq[alen - 1], default)),
141 merge(align_rec(alen, blen - 1), (default, bseq[blen - 1])),
142 merge(align_rec(alen - 1, blen - 1), (aseq[alen - 1], bseq[blen - 1])),
143 ),
144 key=lambda x: x[0],
145 )
147 aseq: Sequence[T] = iter_to_seq(a)
148 bseq: Sequence[T] = iter_to_seq(b)
150 return align_rec(len(aseq), len(bseq))[1]