Coverage for denofo/models.py: 97%
136 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-09 15:27 +0200
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-09 15:27 +0200
1import warnings
2from typing import Self
3from pydantic import (
4 BaseModel,
5 Field,
6 ConfigDict,
7 model_validator,
8 field_validator,
9 ValidationInfo,
10)
11from denofo.utils.ncbiTaxDBcheck import check_NCBI_taxDB
12from denofo.choices import (
13 AnnotGenomeChoices,
14 ORFChoices,
15 GeneticContextChoices,
16 InputDataChoices,
17 TaxSpecificityChoices,
18 HomologyDBChoices,
19 ThresholdChoices,
20 SeqTypeChoices,
21 AnchorChoices,
22 TranslationEvidenceChoices,
23)
26class TaxIDTranslation(UserWarning):
27 """
28 Warning raised when a taxon ID is translated to a taxon name or vice versa.
29 """
31 pass
34class ModelValidError(ValueError):
35 """
36 Error raised when a model validation fails.
37 Necessary to ignore field validators when checking only field validators.
38 """
40 pass
43class AnnotGenome(BaseModel):
44 annotGenomeChoice: list[AnnotGenomeChoices] = Field(default_factory=list)
46 @field_validator("annotGenomeChoice")
47 @classmethod
48 def annotGenomeChoice_unknown(
49 cls, annotGenomeChoice: list[AnnotGenomeChoices], info: ValidationInfo
50 ) -> list[AnnotGenomeChoices]:
51 if (
52 AnnotGenomeChoices.unknown in annotGenomeChoice
53 and len(annotGenomeChoice) > 1
54 ):
55 raise ValueError(
56 "Unknown is not allowed in combination with other choices."
57 )
58 return annotGenomeChoice
61class Transcriptome(BaseModel):
62 expressionLevel: float | None = Field(default=None)
63 transContextChoice: list[GeneticContextChoices] = Field(default_factory=list)
64 customGeneticContext: list[str] | None = Field(default=None)
65 transORFChoice: list[ORFChoices] = Field(default_factory=list)
66 customORF: list[str] | None = Field(default=None)
67 transcriptomeInfo: str | None = Field(default=None)
69 @field_validator("transORFChoice")
70 @classmethod
71 def transORFChoice_exclude(
72 cls, transORFChoice: list[ORFChoices], info: ValidationInfo
73 ) -> list[ORFChoices]:
74 excluding_ORFs = [
75 ORFChoices.highestKoz,
76 ORFChoices.longestORF,
77 ORFChoices.firstORF,
78 ]
79 if len(set(excluding_ORFs) & set(transORFChoice)) > 1:
80 raise ValueError(
81 "The options 'highest Kozac', 'longest ORF' and 'start first ORF'"
82 " exclude each other."
83 )
85 if ORFChoices.noORF in transORFChoice and len(transORFChoice) > 1:
86 raise ValueError("noORF is not allowed in combination with other choices.")
88 if ORFChoices.allORF in transORFChoice and len(transORFChoice) > 1:
89 raise ValueError("allORF is not allowed in combination with other choices.")
91 return transORFChoice
93 @model_validator(mode="after")
94 def customGeneticContext_required(self) -> Self:
95 if (
96 self.transContextChoice
97 and GeneticContextChoices.CUSTOM in self.transContextChoice
98 and self.customGeneticContext is None
99 ):
100 raise ModelValidError(
101 "customGeneticContext is required when transContextChoice is custom."
102 )
103 return self
105 @model_validator(mode="after")
106 def customORFChoice_required(self) -> Self:
107 if (
108 self.transORFChoice
109 and ORFChoices.CUSTOM in self.transORFChoice
110 and self.customORF is None
111 ):
112 raise ModelValidError("customORF is required when ORFChoices is CUSTOM")
113 return self
116class TaxonID(BaseModel):
117 taxID: str | int = Field(default="")
119 @field_validator("taxID")
120 @classmethod
121 def taxID_valid(cls, taxID: str | int, info: ValidationInfo) -> str | int:
122 entry = taxID
123 ncbi = check_NCBI_taxDB()
124 if isinstance(taxID, str) and taxID.isdigit():
125 taxID = int(taxID)
127 if isinstance(taxID, str):
128 try:
129 name2taxid = ncbi.get_name_translator([taxID])
130 taxID = name2taxid[taxID][0]
131 warnings.warn(
132 f"{entry} was translated to taxon ID {taxID}.",
133 TaxIDTranslation,
134 )
135 except KeyError:
136 warnings.warn(
137 f"{taxID} not found in NCBI Taxonomy Database! Using as is.",
138 TaxIDTranslation,
139 )
141 elif isinstance(taxID, int):
142 try:
143 taxnames = ncbi.get_taxid_translator([taxID])
144 taxname = taxnames[taxID]
145 warnings.warn(
146 f"{taxID} translates to {taxname}. Using as is.",
147 TaxIDTranslation,
148 )
149 except KeyError:
150 raise ValueError("TaxonID not found in NCBI Taxonomy Database!")
152 return taxID
155class PhylogeneticTaxa(BaseModel):
156 taxSpecificity: TaxSpecificityChoices = Field(
157 default=TaxSpecificityChoices.lineageSpecif
158 )
159 taxonID: TaxonID = Field(default=TaxonID())
162class HomologyFilter(BaseModel):
163 phylogeneticTaxa: PhylogeneticTaxa | None = Field(default=None)
164 seqType: list[SeqTypeChoices] = Field(default_factory=list)
165 customSeqType: list[str] | None = Field(default=None)
166 structuralSimilarity: str | None = Field(default=None)
167 threshold: list[ThresholdChoices] = Field(default_factory=list)
168 customThreshold: list[str] | None = Field(default=None)
169 thresholdValue: list[float] = Field(default_factory=list)
170 dataBase: list[HomologyDBChoices] = Field(default_factory=list)
171 customDB: list[str] | None = Field(default=None)
173 @model_validator(mode="after")
174 def customs_required(self) -> Self:
175 num_threshold_choices = (
176 len(self.threshold)
177 if not self.customThreshold
178 else len(self.threshold) - 1 + len(self.customThreshold)
179 )
181 if (
182 self.seqType
183 and SeqTypeChoices.CUSTOM in self.seqType
184 and self.customSeqType is None
185 ):
186 raise ModelValidError(
187 "customSeqType is required when SeqTypeChoices is CUSTOM"
188 )
189 if (
190 self.threshold
191 and ThresholdChoices.CUSTOM in self.threshold
192 and self.customThreshold is None
193 ):
194 raise ModelValidError(
195 "customThreshold is required when ThresholdChoices is CUSTOM"
196 )
197 if len(self.thresholdValue) != num_threshold_choices:
198 raise ModelValidError(
199 f"Number of threshold values must match number of threshold choices. You got {len(self.thresholdValue)} threshold values for {num_threshold_choices} threshold choices."
200 )
201 if (
202 self.dataBase
203 and HomologyDBChoices.CUSTOM in self.dataBase
204 and self.customDB is None
205 ):
206 raise ModelValidError(
207 "customDB is required when HomologyDBChoices is CUSTOM"
208 )
209 return self
212class SyntenySearch(BaseModel):
213 anchors: list[AnchorChoices] = Field(default_factory=list)
214 customAnchor: list[str] | None = Field(default=None)
215 softwareSyntenySearch: list[str] | None = Field(default=None)
217 @model_validator(mode="after")
218 def customAnchor_required(self) -> Self:
219 if (
220 self.anchors
221 and AnchorChoices.CUSTOM in self.anchors
222 and self.customAnchor is None
223 ):
224 raise ModelValidError(
225 "customAnchor is required when AnchorChoices is CUSTOM"
226 )
227 return self
230class NonCodingHomologs(BaseModel):
231 enablingMutations: bool = Field(default=False)
232 synteny: SyntenySearch | None = Field(default=None)
235class EvolutionaryInformation(BaseModel):
236 selection: str | None = Field(default=None)
239class TranslationalEvidence(BaseModel):
240 translationEvidence: list[TranslationEvidenceChoices] | None = Field(default=None)
241 customTranslationEvidence: list[str] | None = Field(default=None)
243 @model_validator(mode="after")
244 def customTranslationEvidence_required(self) -> Self:
245 if (
246 self.translationEvidence
247 and TranslationEvidenceChoices.CUSTOM in self.translationEvidence
248 and self.customTranslationEvidence is None
249 ):
250 raise ModelValidError(
251 "customTranslationEvidence is required when TranslationEvidenceChoices is CUSTOM"
252 )
253 return self
256class DeNovoGeneAnnotation(BaseModel):
257 model_config = ConfigDict(
258 extra="forbid",
259 exclude_none=True,
260 title="De Novo Gene Annotation",
261 )
263 inputData: list[InputDataChoices] = Field(default_factory=list)
264 inputAnnotGenome: AnnotGenome | None = Field(default=None)
265 inputTranscriptome: Transcriptome | None = Field(default=None)
266 customInputData: str | None = Field(default=None)
267 evolutionaryInformation: EvolutionaryInformation | None = Field(default=None)
268 homologyFilter: HomologyFilter | None = Field(default=None)
269 nonCodingHomologs: NonCodingHomologs | None = Field(default=None)
270 translationalEvidence: TranslationalEvidence | None = Field(default=None)
271 studyURL: list[str] | None = Field(default=None)
273 @model_validator(mode="after")
274 def inputDataType_required(self) -> Self:
275 if (
276 InputDataChoices.ANNOT_GENOME in self.inputData
277 and self.inputAnnotGenome is None
278 ):
279 raise ModelValidError(
280 "AnnotGenome is required when inputData contains ANNOT_GENOME"
281 )
282 if (
283 InputDataChoices.TRANSCRIPTOME in self.inputData
284 and self.inputTranscriptome is None
285 ):
286 raise ModelValidError(
287 "Transcriptome is required when inputData contains TRANSCRIPTOME"
288 )
289 if InputDataChoices.CUSTOM in self.inputData and (self.customInputData is None):
290 raise ModelValidError(
291 "customInputData is required if inputData contains CUSTOM"
292 )
293 return self