Coverage for isoforms.py : 92%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import warnings
2from typing import List, Generator, Iterable
3from elfragmentador.annotate import peptide_parser
4from elfragmentador.encoding_decoding import clip_explicit_terminus
7class _unique_element:
8 """
9 Part of the answer from https://stackoverflow.com/questions/6284396
10 """
12 def __init__(self, value, occurrences):
13 self.value = value
14 self.occurrences = occurrences
17def perm_unique(elements: Iterable) -> Generator:
18 """perm_unique Gets permutations of elements taking into account repeated.
20 Part of the answer from https://stackoverflow.com/questions/6284396
22 Permutes the elements passed but skips all permutations where elements are
23 the same. For instance (0, 1, 0) would five 3 possibilities.
25 Parameters
26 ----------
27 elements : List or str
28 Elements to be permuted
30 Returns
31 -------
32 Generator
33 A list with all permutations
35 Examples
36 --------
37 >>> out = list(perm_unique("COM"))
38 >>> sorted(out)
39 [('C', 'M', 'O'), ('C', 'O', 'M'), ('M', 'C', 'O'), ('M', 'O', 'C'), ('O', 'C', 'M'), ('O', 'M', 'C')]
40 >>> out = list(perm_unique("CCM"))
41 >>> sorted(out)
42 [('C', 'C', 'M'), ('C', 'M', 'C'), ('M', 'C', 'C')]
43 >>> out = list(perm_unique([0,1,0]))
44 >>> sorted(out)
45 [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
46 """
47 eset = set(elements)
48 listunique = [_unique_element(i, elements.count(i)) for i in eset]
49 u = len(elements)
50 return _perm_unique_helper(listunique, [0] * u, u - 1)
53def _perm_unique_helper(listunique, result_list, d):
54 """
55 Part of the answer from https://stackoverflow.com/questions/6284396
56 """
57 if d < 0:
58 yield tuple(result_list)
59 else:
60 for i in listunique:
61 if i.occurrences > 0:
62 result_list[d] = i.value
63 i.occurrences -= 1
64 for g in _perm_unique_helper(listunique, result_list, d - 1):
65 yield g
66 i.occurrences += 1
69def _get_mod_isoforms(seq: str, mod: str, aas: str) -> List[str]:
70 # mod = "PHOSPHO"
71 # seq = "S[PHOSPHO]AS"
72 # aas = "STY"
73 if mod not in seq:
74 return [seq]
76 parsed_seq = list(peptide_parser(seq))
77 parsed_seq = clip_explicit_terminus(parsed_seq)
78 stripped_seq = [x.replace(f"[{mod}]", "") for x in parsed_seq]
80 placeholder_seq = [
81 x if not any([x[:1] == y for y in aas]) else x[:1] + "{}" for x in stripped_seq
82 ]
83 placeholder_seq = "".join(placeholder_seq)
84 mod_sampler = [x[1:] for x in parsed_seq if any([x[:1] == y for y in aas])]
86 if len(set(mod_sampler)) == 1:
87 perm_iter = [mod_sampler]
88 else:
89 perm_iter = list(perm_unique(mod_sampler))
91 out_seqs = []
93 for _, x in enumerate(perm_iter):
94 out_seqs.append(placeholder_seq.format(*x))
96 return list(set(out_seqs))
99def get_mod_isoforms(seq: str, mods_list: List[str], aas_list: List[str]) -> List[str]:
100 """get_mod_isoforms
102 Gets modification isoforms for a peptide with modifications
104 Parameters
105 ----------
106 seq : str
107 Sequence used
108 mods_list : List[str]
109 List of modification names that can be permuted
110 aas_list : List[str]
111 List of the aminoacids that can be the modified by each modification.
113 Details
114 -------
115 This functions expects the modification and the aminoacid list to be the same length
117 Returns
118 -------
119 List[str]
120 A list with the combination of the sequence with the modifications.
122 Example
123 -------
124 >>> seq = "M[OXIDATION]YPEPT[PHOSPHO]MIDES"
125 >>> mods_list = ["PHOSPHO", "OXIDATION"]
126 >>> aas_list = ["STY", "M"]
127 >>> out = list(get_mod_isoforms(seq, mods_list, aas_list))
128 >>> sorted(out)
129 ['MYPEPTM[OXIDATION]IDES[PHOSPHO]', ... 'M[OXIDATION]Y[PHOSPHO]PEPTMIDES']
130 """
131 seqs = [seq]
133 for mod, aas in zip(mods_list, aas_list):
134 tmp_seqs = []
135 for s in seqs:
136 x = _get_mod_isoforms(s, mod, aas)
137 tmp_seqs.extend(list(set(x)))
138 if len(tmp_seqs) > 10000:
139 warnings.warn("Large number of mod combinations found, clipping at 1k")
140 continue
142 seqs.extend(tmp_seqs)
143 seqs.extend([seq])
145 return list(set(seqs))