Coverage for denofo/models.py: 97%

136 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-09 15:27 +0200

1import warnings 

2from typing import Self 

3from pydantic import ( 

4 BaseModel, 

5 Field, 

6 ConfigDict, 

7 model_validator, 

8 field_validator, 

9 ValidationInfo, 

10) 

11from denofo.utils.ncbiTaxDBcheck import check_NCBI_taxDB 

12from denofo.choices import ( 

13 AnnotGenomeChoices, 

14 ORFChoices, 

15 GeneticContextChoices, 

16 InputDataChoices, 

17 TaxSpecificityChoices, 

18 HomologyDBChoices, 

19 ThresholdChoices, 

20 SeqTypeChoices, 

21 AnchorChoices, 

22 TranslationEvidenceChoices, 

23) 

24 

25 

26class TaxIDTranslation(UserWarning): 

27 """ 

28 Warning raised when a taxon ID is translated to a taxon name or vice versa. 

29 """ 

30 

31 pass 

32 

33 

34class ModelValidError(ValueError): 

35 """ 

36 Error raised when a model validation fails. 

37 Necessary to ignore field validators when checking only field validators. 

38 """ 

39 

40 pass 

41 

42 

43class AnnotGenome(BaseModel): 

44 annotGenomeChoice: list[AnnotGenomeChoices] = Field(default_factory=list) 

45 

46 @field_validator("annotGenomeChoice") 

47 @classmethod 

48 def annotGenomeChoice_unknown( 

49 cls, annotGenomeChoice: list[AnnotGenomeChoices], info: ValidationInfo 

50 ) -> list[AnnotGenomeChoices]: 

51 if ( 

52 AnnotGenomeChoices.unknown in annotGenomeChoice 

53 and len(annotGenomeChoice) > 1 

54 ): 

55 raise ValueError( 

56 "Unknown is not allowed in combination with other choices." 

57 ) 

58 return annotGenomeChoice 

59 

60 

61class Transcriptome(BaseModel): 

62 expressionLevel: float | None = Field(default=None) 

63 transContextChoice: list[GeneticContextChoices] = Field(default_factory=list) 

64 customGeneticContext: list[str] | None = Field(default=None) 

65 transORFChoice: list[ORFChoices] = Field(default_factory=list) 

66 customORF: list[str] | None = Field(default=None) 

67 transcriptomeInfo: str | None = Field(default=None) 

68 

69 @field_validator("transORFChoice") 

70 @classmethod 

71 def transORFChoice_exclude( 

72 cls, transORFChoice: list[ORFChoices], info: ValidationInfo 

73 ) -> list[ORFChoices]: 

74 excluding_ORFs = [ 

75 ORFChoices.highestKoz, 

76 ORFChoices.longestORF, 

77 ORFChoices.firstORF, 

78 ] 

79 if len(set(excluding_ORFs) & set(transORFChoice)) > 1: 

80 raise ValueError( 

81 "The options 'highest Kozac', 'longest ORF' and 'start first ORF'" 

82 " exclude each other." 

83 ) 

84 

85 if ORFChoices.noORF in transORFChoice and len(transORFChoice) > 1: 

86 raise ValueError("noORF is not allowed in combination with other choices.") 

87 

88 if ORFChoices.allORF in transORFChoice and len(transORFChoice) > 1: 

89 raise ValueError("allORF is not allowed in combination with other choices.") 

90 

91 return transORFChoice 

92 

93 @model_validator(mode="after") 

94 def customGeneticContext_required(self) -> Self: 

95 if ( 

96 self.transContextChoice 

97 and GeneticContextChoices.CUSTOM in self.transContextChoice 

98 and self.customGeneticContext is None 

99 ): 

100 raise ModelValidError( 

101 "customGeneticContext is required when transContextChoice is custom." 

102 ) 

103 return self 

104 

105 @model_validator(mode="after") 

106 def customORFChoice_required(self) -> Self: 

107 if ( 

108 self.transORFChoice 

109 and ORFChoices.CUSTOM in self.transORFChoice 

110 and self.customORF is None 

111 ): 

112 raise ModelValidError("customORF is required when ORFChoices is CUSTOM") 

113 return self 

114 

115 

116class TaxonID(BaseModel): 

117 taxID: str | int = Field(default="") 

118 

119 @field_validator("taxID") 

120 @classmethod 

121 def taxID_valid(cls, taxID: str | int, info: ValidationInfo) -> str | int: 

122 entry = taxID 

123 ncbi = check_NCBI_taxDB() 

124 if isinstance(taxID, str) and taxID.isdigit(): 

125 taxID = int(taxID) 

126 

127 if isinstance(taxID, str): 

128 try: 

129 name2taxid = ncbi.get_name_translator([taxID]) 

130 taxID = name2taxid[taxID][0] 

131 warnings.warn( 

132 f"{entry} was translated to taxon ID {taxID}.", 

133 TaxIDTranslation, 

134 ) 

135 except KeyError: 

136 warnings.warn( 

137 f"{taxID} not found in NCBI Taxonomy Database! Using as is.", 

138 TaxIDTranslation, 

139 ) 

140 

141 elif isinstance(taxID, int): 

142 try: 

143 taxnames = ncbi.get_taxid_translator([taxID]) 

144 taxname = taxnames[taxID] 

145 warnings.warn( 

146 f"{taxID} translates to {taxname}. Using as is.", 

147 TaxIDTranslation, 

148 ) 

149 except KeyError: 

150 raise ValueError("TaxonID not found in NCBI Taxonomy Database!") 

151 

152 return taxID 

153 

154 

155class PhylogeneticTaxa(BaseModel): 

156 taxSpecificity: TaxSpecificityChoices = Field( 

157 default=TaxSpecificityChoices.lineageSpecif 

158 ) 

159 taxonID: TaxonID = Field(default=TaxonID()) 

160 

161 

162class HomologyFilter(BaseModel): 

163 phylogeneticTaxa: PhylogeneticTaxa | None = Field(default=None) 

164 seqType: list[SeqTypeChoices] = Field(default_factory=list) 

165 customSeqType: list[str] | None = Field(default=None) 

166 structuralSimilarity: str | None = Field(default=None) 

167 threshold: list[ThresholdChoices] = Field(default_factory=list) 

168 customThreshold: list[str] | None = Field(default=None) 

169 thresholdValue: list[float] = Field(default_factory=list) 

170 dataBase: list[HomologyDBChoices] = Field(default_factory=list) 

171 customDB: list[str] | None = Field(default=None) 

172 

173 @model_validator(mode="after") 

174 def customs_required(self) -> Self: 

175 num_threshold_choices = ( 

176 len(self.threshold) 

177 if not self.customThreshold 

178 else len(self.threshold) - 1 + len(self.customThreshold) 

179 ) 

180 

181 if ( 

182 self.seqType 

183 and SeqTypeChoices.CUSTOM in self.seqType 

184 and self.customSeqType is None 

185 ): 

186 raise ModelValidError( 

187 "customSeqType is required when SeqTypeChoices is CUSTOM" 

188 ) 

189 if ( 

190 self.threshold 

191 and ThresholdChoices.CUSTOM in self.threshold 

192 and self.customThreshold is None 

193 ): 

194 raise ModelValidError( 

195 "customThreshold is required when ThresholdChoices is CUSTOM" 

196 ) 

197 if len(self.thresholdValue) != num_threshold_choices: 

198 raise ModelValidError( 

199 f"Number of threshold values must match number of threshold choices. You got {len(self.thresholdValue)} threshold values for {num_threshold_choices} threshold choices." 

200 ) 

201 if ( 

202 self.dataBase 

203 and HomologyDBChoices.CUSTOM in self.dataBase 

204 and self.customDB is None 

205 ): 

206 raise ModelValidError( 

207 "customDB is required when HomologyDBChoices is CUSTOM" 

208 ) 

209 return self 

210 

211 

212class SyntenySearch(BaseModel): 

213 anchors: list[AnchorChoices] = Field(default_factory=list) 

214 customAnchor: list[str] | None = Field(default=None) 

215 softwareSyntenySearch: list[str] | None = Field(default=None) 

216 

217 @model_validator(mode="after") 

218 def customAnchor_required(self) -> Self: 

219 if ( 

220 self.anchors 

221 and AnchorChoices.CUSTOM in self.anchors 

222 and self.customAnchor is None 

223 ): 

224 raise ModelValidError( 

225 "customAnchor is required when AnchorChoices is CUSTOM" 

226 ) 

227 return self 

228 

229 

230class NonCodingHomologs(BaseModel): 

231 enablingMutations: bool = Field(default=False) 

232 synteny: SyntenySearch | None = Field(default=None) 

233 

234 

235class EvolutionaryInformation(BaseModel): 

236 selection: str | None = Field(default=None) 

237 

238 

239class TranslationalEvidence(BaseModel): 

240 translationEvidence: list[TranslationEvidenceChoices] | None = Field(default=None) 

241 customTranslationEvidence: list[str] | None = Field(default=None) 

242 

243 @model_validator(mode="after") 

244 def customTranslationEvidence_required(self) -> Self: 

245 if ( 

246 self.translationEvidence 

247 and TranslationEvidenceChoices.CUSTOM in self.translationEvidence 

248 and self.customTranslationEvidence is None 

249 ): 

250 raise ModelValidError( 

251 "customTranslationEvidence is required when TranslationEvidenceChoices is CUSTOM" 

252 ) 

253 return self 

254 

255 

256class DeNovoGeneAnnotation(BaseModel): 

257 model_config = ConfigDict( 

258 extra="forbid", 

259 exclude_none=True, 

260 title="De Novo Gene Annotation", 

261 ) 

262 

263 inputData: list[InputDataChoices] = Field(default_factory=list) 

264 inputAnnotGenome: AnnotGenome | None = Field(default=None) 

265 inputTranscriptome: Transcriptome | None = Field(default=None) 

266 customInputData: str | None = Field(default=None) 

267 evolutionaryInformation: EvolutionaryInformation | None = Field(default=None) 

268 homologyFilter: HomologyFilter | None = Field(default=None) 

269 nonCodingHomologs: NonCodingHomologs | None = Field(default=None) 

270 translationalEvidence: TranslationalEvidence | None = Field(default=None) 

271 studyURL: list[str] | None = Field(default=None) 

272 

273 @model_validator(mode="after") 

274 def inputDataType_required(self) -> Self: 

275 if ( 

276 InputDataChoices.ANNOT_GENOME in self.inputData 

277 and self.inputAnnotGenome is None 

278 ): 

279 raise ModelValidError( 

280 "AnnotGenome is required when inputData contains ANNOT_GENOME" 

281 ) 

282 if ( 

283 InputDataChoices.TRANSCRIPTOME in self.inputData 

284 and self.inputTranscriptome is None 

285 ): 

286 raise ModelValidError( 

287 "Transcriptome is required when inputData contains TRANSCRIPTOME" 

288 ) 

289 if InputDataChoices.CUSTOM in self.inputData and (self.customInputData is None): 

290 raise ModelValidError( 

291 "customInputData is required if inputData contains CUSTOM" 

292 ) 

293 return self