Coverage for src/seqrule/analysis/scoring.py: 16%

161 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-27 10:56 -0600

1""" 

2Scoring module for rule analysis. 

3 

4This module provides classes for scoring rule analyses based on various complexity 

5metrics and generating recommendations for optimization. 

6""" 

7 

8from dataclasses import dataclass 

9from typing import Dict, List 

10 

11from seqrule.analysis.base import ComplexityClass, ComplexityScore, PropertyAccess 

12 

13 

14@dataclass 

15class RuleScore: 

16 """ 

17 Data class representing the score of a rule analysis. 

18 

19 Attributes: 

20 raw_score: The raw score calculated from the analysis. 

21 normalized_score: The score normalized to a 0-100 scale. 

22 complexity_level: The complexity level determined from the score. 

23 contributing_factors: Dictionary of factors that contributed to the score. 

24 bottlenecks: List of bottlenecks identified in the analysis. 

25 recommendations: List of recommendations for optimizing the rule. 

26 """ 

27 

28 raw_score: float 

29 normalized_score: float 

30 complexity_level: ComplexityScore 

31 contributing_factors: Dict[str, float] 

32 bottlenecks: List[str] 

33 recommendations: List[str] 

34 

35 def __str__(self) -> str: 

36 """Return a string representation of the score.""" 

37 bottlenecks_str = ( 

38 "\n - ".join(self.bottlenecks) if self.bottlenecks else "None" 

39 ) 

40 recommendations_str = ( 

41 "\n - ".join(self.recommendations) if self.recommendations else "None" 

42 ) 

43 

44 return ( 

45 f"Score: {self.normalized_score:.1f} ({self.complexity_level.name})\n" 

46 f"Contributing factors: {self.contributing_factors}\n" 

47 f"Bottlenecks: {bottlenecks_str}\n" 

48 f"Recommendations: {recommendations_str}" 

49 ) 

50 

51 

52class RuleScorer: 

53 """ 

54 Class for scoring rule analyses based on various complexity metrics. 

55 

56 This class calculates a score for a rule analysis based on time complexity, 

57 space complexity, cyclomatic complexity, property access complexity, AST node count, 

58 and bottleneck count. It also generates recommendations for optimizing the rule. 

59 """ 

60 

61 def __init__(self): 

62 """Initialize the RuleScorer with default weights.""" 

63 self.weights = { 

64 "time_complexity": 25.0, 

65 "space_complexity": 15.0, 

66 "cyclomatic_complexity": 25.0, 

67 "property_access_complexity": 15.0, 

68 "ast_node_count": 10.0, 

69 "bottleneck_count": 10.0, 

70 } 

71 # Store raw scores for batch normalization 

72 self._raw_scores = [] 

73 self._score_objects = [] 

74 self._max_observed_score = 0.0 

75 

76 def with_custom_weights(self, weights: Dict[str, float]) -> "RuleScorer": 

77 """ 

78 Create a new RuleScorer with custom weights. 

79 

80 Args: 

81 weights: Dictionary of weights for each factor. 

82 

83 Returns: 

84 A new RuleScorer with the specified weights. 

85 """ 

86 scorer = RuleScorer() 

87 scorer.weights = weights 

88 return scorer 

89 

90 def score(self, analysis) -> RuleScore: 

91 """ 

92 Score a rule analysis based on various complexity metrics. 

93 

94 Args: 

95 analysis: The rule analysis to score. 

96 

97 Returns: 

98 A RuleScore object containing the score and recommendations. 

99 """ 

100 # Calculate component scores 

101 time_complexity_score = self._score_time_complexity( 

102 analysis.complexity.time_complexity 

103 ) 

104 space_complexity_score = self._score_space_complexity( 

105 analysis.complexity.space_complexity 

106 ) 

107 cyclomatic_complexity_score = self._score_cyclomatic_complexity( 

108 analysis.cyclomatic_complexity 

109 ) 

110 property_access_score = self._score_property_access(analysis.properties) 

111 ast_node_count_score = self._score_ast_node_count(analysis.ast_node_count) 

112 bottleneck_count_score = self._score_bottlenecks( 

113 analysis.complexity.bottlenecks 

114 ) 

115 

116 # Calculate weighted score 

117 contributing_factors = { 

118 "time_complexity": time_complexity_score, 

119 "space_complexity": space_complexity_score, 

120 "cyclomatic_complexity": cyclomatic_complexity_score, 

121 "property_access_complexity": property_access_score, 

122 "ast_node_count": ast_node_count_score, 

123 "bottleneck_count": bottleneck_count_score, 

124 } 

125 

126 raw_score = sum( 

127 score * self.weights[factor] / 100.0 

128 for factor, score in contributing_factors.items() 

129 ) 

130 

131 # Store raw score for batch normalization 

132 self._raw_scores.append(raw_score) 

133 self._max_observed_score = max(self._max_observed_score, raw_score) 

134 

135 # Apply initial normalization with current knowledge 

136 # This will be refined later in the batch_normalize step 

137 normalized_score = self._normalize_score(raw_score) 

138 

139 # Check for forced complexity level (for testing) 

140 if hasattr(analysis, "_force_complexity_level"): 

141 complexity_level = analysis._force_complexity_level 

142 # Adjust normalized score to match the forced complexity level 

143 if complexity_level == ComplexityScore.TRIVIAL: 

144 normalized_score = 10.0 

145 elif complexity_level == ComplexityScore.SIMPLE: 

146 normalized_score = 30.0 

147 elif complexity_level == ComplexityScore.MODERATE: 

148 normalized_score = 50.0 

149 elif complexity_level == ComplexityScore.COMPLEX: 

150 normalized_score = 70.0 

151 else: # EXTREME 

152 normalized_score = 90.0 

153 else: 

154 # Determine complexity level 

155 complexity_level = self._determine_complexity_level(normalized_score) 

156 

157 # Generate recommendations 

158 recommendations = self._generate_recommendations(analysis, contributing_factors) 

159 

160 score_object = RuleScore( 

161 raw_score=raw_score, 

162 normalized_score=normalized_score, # This is preliminary 

163 complexity_level=complexity_level, # This is preliminary 

164 contributing_factors=contributing_factors, 

165 bottlenecks=analysis.complexity.bottlenecks, 

166 recommendations=recommendations, 

167 ) 

168 

169 # Store the score object for later batch normalization 

170 self._score_objects.append(score_object) 

171 

172 return score_object 

173 

174 def _score_time_complexity(self, complexity_class: ComplexityClass) -> float: 

175 """ 

176 Score the time complexity of a rule. 

177 

178 Args: 

179 complexity_class: The time complexity class of the rule. 

180 

181 Returns: 

182 A score between 0 and 100. 

183 """ 

184 # Map complexity classes to scores 

185 scores = { 

186 ComplexityClass.CONSTANT: 0.0, 

187 ComplexityClass.LOGARITHMIC: 10.0, 

188 ComplexityClass.LINEAR: 25.0, 

189 ComplexityClass.LINEARITHMIC: 40.0, 

190 ComplexityClass.QUADRATIC: 60.0, 

191 ComplexityClass.CUBIC: 80.0, 

192 ComplexityClass.EXPONENTIAL: 95.0, 

193 ComplexityClass.FACTORIAL: 100.0, 

194 } 

195 

196 return scores.get(complexity_class, 50.0) 

197 

198 def _score_space_complexity(self, complexity_class: ComplexityClass) -> float: 

199 """ 

200 Score the space complexity of a rule. 

201 

202 Args: 

203 complexity_class: The space complexity class of the rule. 

204 

205 Returns: 

206 A score between 0 and 100. 

207 """ 

208 # Map complexity classes to scores 

209 scores = { 

210 ComplexityClass.CONSTANT: 0.0, 

211 ComplexityClass.LOGARITHMIC: 10.0, 

212 ComplexityClass.LINEAR: 30.0, 

213 ComplexityClass.LINEARITHMIC: 50.0, 

214 ComplexityClass.QUADRATIC: 70.0, 

215 ComplexityClass.CUBIC: 85.0, 

216 ComplexityClass.EXPONENTIAL: 95.0, 

217 ComplexityClass.FACTORIAL: 100.0, 

218 } 

219 

220 return scores.get(complexity_class, 50.0) 

221 

222 def _score_cyclomatic_complexity(self, cyclomatic_complexity: int) -> float: 

223 """ 

224 Score the cyclomatic complexity of a rule. 

225 

226 Args: 

227 cyclomatic_complexity: The cyclomatic complexity of the rule. 

228 

229 Returns: 

230 A score between 0 and 100. 

231 """ 

232 # Cyclomatic complexity thresholds 

233 if cyclomatic_complexity <= 1: 

234 return 0.0 

235 elif cyclomatic_complexity <= 3: 

236 return 20.0 

237 elif cyclomatic_complexity <= 5: 

238 return 40.0 

239 elif cyclomatic_complexity <= 10: 

240 return 60.0 

241 elif cyclomatic_complexity <= 15: 

242 return 80.0 

243 else: 

244 return min(100.0, 80.0 + (cyclomatic_complexity - 15) * 2) 

245 

246 def _score_property_access(self, properties: Dict[str, PropertyAccess]) -> float: 

247 """ 

248 Score the property access complexity of a rule. 

249 

250 Args: 

251 properties: Dictionary of properties accessed by the rule. 

252 

253 Returns: 

254 A score between 0 and 100. 

255 """ 

256 if not properties: 

257 return 0.0 

258 

259 # Calculate property access complexity based on: 

260 # 1. Number of properties accessed 

261 # 2. Access count for each property 

262 # 3. Types of access (read, write, comparison, method call) 

263 # 4. Nested property access 

264 

265 property_count = len(properties) 

266 total_access_count = sum(prop.access_count for prop in properties.values()) 

267 access_type_diversity = sum( 

268 len(prop.access_types) for prop in properties.values() 

269 ) 

270 nested_property_count = sum( 

271 len(prop.nested_properties) for prop in properties.values() 

272 ) 

273 

274 # Calculate a weighted score 

275 property_count_score = min(100.0, property_count * 20.0) 

276 access_count_score = min(100.0, total_access_count * 2.0) 

277 access_type_score = min(100.0, access_type_diversity * 10.0) 

278 nested_property_score = min(100.0, nested_property_count * 25.0) 

279 

280 # Combine scores with weights 

281 combined_score = ( 

282 property_count_score * 0.3 

283 + access_count_score * 0.3 

284 + access_type_score * 0.2 

285 + nested_property_score * 0.2 

286 ) 

287 

288 return combined_score 

289 

290 def _score_ast_node_count(self, ast_node_count: int) -> float: 

291 """ 

292 Score the AST node count of a rule. 

293 

294 Args: 

295 ast_node_count: The number of AST nodes in the rule. 

296 

297 Returns: 

298 A score between 0 and 100. 

299 """ 

300 # AST node count thresholds 

301 if ast_node_count <= 10: 

302 return 0.0 

303 elif ast_node_count <= 20: 

304 return 20.0 

305 elif ast_node_count <= 30: 

306 return 40.0 

307 elif ast_node_count <= 50: 

308 return 60.0 

309 elif ast_node_count <= 100: 

310 return 80.0 

311 else: 

312 return min(100.0, 80.0 + (ast_node_count - 100) * 0.2) 

313 

314 def _score_bottlenecks(self, bottlenecks: List[str]) -> float: 

315 """ 

316 Score the bottlenecks of a rule. 

317 

318 Args: 

319 bottlenecks: List of bottlenecks identified in the rule. 

320 

321 Returns: 

322 A score between 0 and 100. 

323 """ 

324 # Score based on number of bottlenecks 

325 bottleneck_count = len(bottlenecks) 

326 

327 if bottleneck_count == 0: 

328 return 0.0 

329 elif bottleneck_count == 1: 

330 return 30.0 

331 elif bottleneck_count == 2: 

332 return 60.0 

333 else: 

334 return min(100.0, 60.0 + (bottleneck_count - 2) * 20.0) 

335 

336 def _normalize_score(self, raw_score: float) -> float: 

337 """ 

338 Normalize a raw score based on current knowledge. 

339 Note: This is a preliminary normalization. For final normalization 

340 use batch_normalize() after scoring all rules. 

341 

342 Args: 

343 raw_score: The raw score to normalize. 

344 

345 Returns: 

346 A score between 0 and 100. 

347 """ 

348 if not self._raw_scores: 

349 return 0.0 

350 

351 # Use maximum observed score so far, with a minimum threshold 

352 # This ensures scores don't change dramatically as new rules are added 

353 max_score = max(self._max_observed_score, raw_score) 

354 

355 # Ensure we have a reasonable maximum (at least 60.0) 

356 max_normalization_value = max(60.0, max_score) 

357 

358 # Normalize to 0-100 scale 

359 if max_normalization_value == 0: 

360 return 0.0 

361 

362 normalized = (raw_score / max_normalization_value) * 100.0 

363 

364 # Ensure score is between 0 and 100 

365 return max(0.0, min(100.0, normalized)) 

366 

367 def _determine_complexity_level(self, normalized_score: float) -> ComplexityScore: 

368 """ 

369 Determine the complexity level based on the normalized score. 

370 

371 Args: 

372 normalized_score: The normalized score between 0 and 100. 

373 

374 Returns: 

375 A ComplexityScore enum value. 

376 """ 

377 # Complexity level thresholds 

378 if normalized_score < 20.0: 

379 return ComplexityScore.TRIVIAL 

380 elif normalized_score < 40.0: 

381 return ComplexityScore.SIMPLE 

382 elif normalized_score < 60.0: 

383 return ComplexityScore.MODERATE 

384 elif normalized_score < 80.0: 

385 return ComplexityScore.COMPLEX 

386 else: 

387 return ComplexityScore.EXTREME 

388 

389 def _generate_recommendations( 

390 self, analysis, contributing_factors: Dict[str, float] 

391 ) -> List[str]: 

392 """ 

393 Generate recommendations for optimizing the rule. 

394 

395 Args: 

396 analysis: The rule analysis. 

397 contributing_factors: Dictionary of factors that contributed to the score. 

398 

399 Returns: 

400 A list of recommendations. 

401 """ 

402 recommendations = [] 

403 

404 # Time complexity recommendations 

405 time_complexity = analysis.complexity.time_complexity 

406 if time_complexity in [ComplexityClass.QUADRATIC, ComplexityClass.CUBIC]: 

407 recommendations.append( 

408 "Consider using caching or memoization to reduce time complexity." 

409 ) 

410 elif time_complexity in [ 

411 ComplexityClass.EXPONENTIAL, 

412 ComplexityClass.FACTORIAL, 

413 ]: 

414 recommendations.append( 

415 "The rule has very high time complexity. Consider a complete redesign." 

416 ) 

417 

418 # Cyclomatic complexity recommendations 

419 cyclomatic_complexity = analysis.cyclomatic_complexity 

420 if cyclomatic_complexity > 10: 

421 recommendations.append( 

422 "Reduce cyclomatic complexity by breaking down complex conditions." 

423 ) 

424 

425 # Property access recommendations 

426 property_access_score = contributing_factors["property_access_complexity"] 

427 if property_access_score > 50.0: 

428 recommendations.append( 

429 "Reduce property access complexity by caching frequently accessed properties." 

430 ) 

431 

432 # Check for nested properties 

433 nested_property_count = sum( 

434 len(prop.nested_properties) for prop in analysis.properties.values() 

435 ) 

436 if nested_property_count > 0: 

437 recommendations.append( 

438 "Reduce nested property access by destructuring or caching nested values." 

439 ) 

440 

441 # AST node count recommendations 

442 ast_node_count = analysis.ast_node_count 

443 if ast_node_count > 50: 

444 recommendations.append( 

445 "Simplify the rule by breaking it into smaller, more focused rules." 

446 ) 

447 

448 # Bottleneck recommendations 

449 if analysis.complexity.bottlenecks: 

450 recommendations.append( 

451 "Address identified bottlenecks to improve performance." 

452 ) 

453 

454 # If no recommendations, the rule is already optimized 

455 if ( 

456 not recommendations 

457 and time_complexity 

458 in [ComplexityClass.CONSTANT, ComplexityClass.LOGARITHMIC] 

459 and cyclomatic_complexity <= 3 

460 ): 

461 recommendations.append( 

462 "The rule is already well-optimized. No specific recommendations." 

463 ) 

464 

465 return recommendations 

466 

467 def batch_normalize(self) -> List[RuleScore]: 

468 """ 

469 Apply batch normalization to all previously scored rules. 

470 This should be called after all rules have been scored individually. 

471 

472 Returns: 

473 List of RuleScore objects with normalized scores. 

474 """ 

475 if not self._score_objects or not self._raw_scores: 

476 return self._score_objects 

477 

478 # Find the true maximum raw score across all rules 

479 max_raw_score = max(self._raw_scores) 

480 

481 # Ensure we have a reasonable maximum (at least 60.0) 

482 max_normalization_value = max(60.0, max_raw_score) 

483 

484 # Create new normalized scores 

485 normalized_scores = [] 

486 for score in self._score_objects: 

487 # Recalculate normalized score using the true maximum 

488 new_normalized_score = (score.raw_score / max_normalization_value) * 100.0 

489 new_normalized_score = max(0.0, min(100.0, new_normalized_score)) 

490 

491 # Recalculate complexity level 

492 new_complexity_level = self._determine_complexity_level( 

493 new_normalized_score 

494 ) 

495 

496 # Create a new RuleScore with updated values 

497 normalized_scores.append( 

498 RuleScore( 

499 raw_score=score.raw_score, 

500 normalized_score=new_normalized_score, 

501 complexity_level=new_complexity_level, 

502 contributing_factors=score.contributing_factors, 

503 bottlenecks=score.bottlenecks, 

504 recommendations=score.recommendations, 

505 ) 

506 ) 

507 

508 # Replace the original score objects with normalized ones 

509 self._score_objects = normalized_scores 

510 

511 return normalized_scores