Coverage for src/extratools_core/seq/subseq.py: 0%

62 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-04 06:07 -0700

1import math 

2from collections.abc import Callable, Iterable, Sequence 

3from functools import cache 

4from itertools import chain, starmap 

5 

6from toolz.itertoolz import count 

7 

8from ..typing import Comparable 

9from . import iter_to_seq 

10 

11 

12def best_subseq[T]( 

13 a: Iterable[T], 

14 key: Callable[[Iterable[T]], Comparable], 

15) -> Iterable[T]: 

16 s: Sequence = iter_to_seq(a) 

17 

18 return max( 

19 chain([[]], ( 

20 s[i:j] 

21 for i in range(len(s)) 

22 for j in range(i + 1, len(s) + 1) 

23 )), 

24 key=key, 

25 ) 

26 

27 

28def common_subseq[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]: 

29 @cache 

30 # Find the start pos in list `a` 

31 def align_rec(alen: int, blen: int) -> int: 

32 if alen == 0 or blen == 0 or aseq[alen - 1] != bseq[blen - 1]: 

33 return alen 

34 

35 return align_rec(alen - 1, blen - 1) 

36 

37 aseq: Sequence[T] = iter_to_seq(a) 

38 bseq: Sequence[T] = iter_to_seq(b) 

39 

40 for k in range(*max( 

41 ( 

42 (align_rec(i, j), i) 

43 for i in range(len(aseq) + 1) 

44 for j in range(len(bseq) + 1) 

45 ), 

46 key=lambda x: x[1] - x[0], 

47 )): 

48 yield aseq[k] 

49 

50 

51def is_subseq[T](a: Iterable[T], b: Iterable[T]) -> bool: 

52 aseq: Sequence[T] = iter_to_seq(a) 

53 return count(common_subseq(aseq, b)) == len(aseq) 

54 

55 

56def best_subseq_with_gaps[T]( 

57 a: Iterable[T], 

58 key: Callable[[Iterable[T]], Comparable], 

59) -> Iterable[T]: 

60 def find(alen: int) -> tuple[Comparable, list[T]]: 

61 if alen == 0: 

62 return (key([]), []) 

63 

64 prevcost: Comparable 

65 prevseq: list[T] 

66 prevcost, prevseq = find(alen - 1) 

67 

68 currseq: list[T] = [*prevseq, b[alen - 1]] 

69 

70 return max( 

71 (prevcost, prevseq), 

72 (key(currseq), currseq), 

73 key=lambda x: x[0], 

74 ) 

75 

76 b: Sequence[T] = iter_to_seq(a) 

77 return find(len(b))[1] 

78 

79 

80def common_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> Iterable[T]: 

81 alignment: tuple[float, tuple[Iterable[T | None], Iterable[T | None]]] | None = align(a, b) 

82 if alignment is None: 

83 # Alignment cannot be `None` as we do not have cost bound 

84 raise RuntimeError 

85 

86 return ( 

87 x 

88 for x, y in zip( 

89 *(alignment[1]), 

90 strict=False, 

91 ) 

92 # Actually x and y cannot both be None 

93 if x is not None and x == y 

94 ) 

95 

96 

97def is_subseq_with_gaps[T](a: Iterable[T], b: Iterable[T]) -> bool: 

98 aseq: Sequence[T] = iter_to_seq(a) 

99 return count(common_subseq_with_gaps(aseq, b)) == len(aseq) 

100 

101 

102def align[T]( 

103 a: Iterable[T], 

104 b: Iterable[T], 

105 *, 

106 cost: Callable[[T, T], float] | None = None, 

107 bound: float = math.inf, 

108 default: T = None, 

109) -> tuple[float, tuple[Iterable[T | None], Iterable[T | None]]] | None: 

110 def merge( 

111 prev: tuple[float, tuple[Sequence[T | None], Sequence[T | None]]] | None, 

112 curr: tuple[T, T], 

113 ) -> tuple[float, tuple[Sequence[T | None], Sequence[T | None]]] | None: 

114 if prev is None: 

115 return None 

116 

117 prevcost: float 

118 u: Sequence[T | None] 

119 v: Sequence[T | None] 

120 prevcost, (u, v) = prev 

121 x: T 

122 y: T 

123 x, y = curr 

124 

125 currcost: float = prevcost + costfunc(x, y) 

126 if currcost > bound: 

127 return None 

128 

129 return currcost, ([*u, x], [*v, y]) 

130 

131 @cache 

132 def align_rec(alen: int, blen: int) -> tuple[ 

133 float, 

134 tuple[Sequence[T | None], Sequence[T | None]], 

135 ] | None: 

136 if alen == 0 or blen == 0: 

137 res: tuple[Sequence[T], Sequence[T]] = ( 

138 [default] * blen, bseq[:blen], 

139 ) if alen == 0 else ( 

140 aseq[:alen], [default] * alen, 

141 ) 

142 

143 return ( 

144 sum(starmap(costfunc, zip(*res, strict=False))), 

145 res, 

146 ) 

147 

148 return min( 

149 ( 

150 merge(align_rec(alen - 1, blen), (aseq[alen - 1], default)), 

151 merge(align_rec(alen, blen - 1), (default, bseq[blen - 1])), 

152 merge(align_rec(alen - 1, blen - 1), (aseq[alen - 1], bseq[blen - 1])), 

153 ), 

154 key=lambda x: x[0] if x else math.inf, 

155 default=None, 

156 ) 

157 

158 def default_cost(x: T, y: T) -> float: 

159 return 0 if x == y else 1 

160 

161 aseq: Sequence[T] = iter_to_seq(a) 

162 bseq: Sequence[T] = iter_to_seq(b) 

163 

164 costfunc: Callable[[T, T], float] = cost or default_cost 

165 

166 return align_rec(len(aseq), len(bseq))