Coverage for src/seqrule/analysis/scoring.py: 16%
161 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
1"""
2Scoring module for rule analysis.
4This module provides classes for scoring rule analyses based on various complexity
5metrics and generating recommendations for optimization.
6"""
8from dataclasses import dataclass
9from typing import Dict, List
11from seqrule.analysis.base import ComplexityClass, ComplexityScore, PropertyAccess
14@dataclass
15class RuleScore:
16 """
17 Data class representing the score of a rule analysis.
19 Attributes:
20 raw_score: The raw score calculated from the analysis.
21 normalized_score: The score normalized to a 0-100 scale.
22 complexity_level: The complexity level determined from the score.
23 contributing_factors: Dictionary of factors that contributed to the score.
24 bottlenecks: List of bottlenecks identified in the analysis.
25 recommendations: List of recommendations for optimizing the rule.
26 """
28 raw_score: float
29 normalized_score: float
30 complexity_level: ComplexityScore
31 contributing_factors: Dict[str, float]
32 bottlenecks: List[str]
33 recommendations: List[str]
35 def __str__(self) -> str:
36 """Return a string representation of the score."""
37 bottlenecks_str = (
38 "\n - ".join(self.bottlenecks) if self.bottlenecks else "None"
39 )
40 recommendations_str = (
41 "\n - ".join(self.recommendations) if self.recommendations else "None"
42 )
44 return (
45 f"Score: {self.normalized_score:.1f} ({self.complexity_level.name})\n"
46 f"Contributing factors: {self.contributing_factors}\n"
47 f"Bottlenecks: {bottlenecks_str}\n"
48 f"Recommendations: {recommendations_str}"
49 )
52class RuleScorer:
53 """
54 Class for scoring rule analyses based on various complexity metrics.
56 This class calculates a score for a rule analysis based on time complexity,
57 space complexity, cyclomatic complexity, property access complexity, AST node count,
58 and bottleneck count. It also generates recommendations for optimizing the rule.
59 """
61 def __init__(self):
62 """Initialize the RuleScorer with default weights."""
63 self.weights = {
64 "time_complexity": 25.0,
65 "space_complexity": 15.0,
66 "cyclomatic_complexity": 25.0,
67 "property_access_complexity": 15.0,
68 "ast_node_count": 10.0,
69 "bottleneck_count": 10.0,
70 }
71 # Store raw scores for batch normalization
72 self._raw_scores = []
73 self._score_objects = []
74 self._max_observed_score = 0.0
76 def with_custom_weights(self, weights: Dict[str, float]) -> "RuleScorer":
77 """
78 Create a new RuleScorer with custom weights.
80 Args:
81 weights: Dictionary of weights for each factor.
83 Returns:
84 A new RuleScorer with the specified weights.
85 """
86 scorer = RuleScorer()
87 scorer.weights = weights
88 return scorer
90 def score(self, analysis) -> RuleScore:
91 """
92 Score a rule analysis based on various complexity metrics.
94 Args:
95 analysis: The rule analysis to score.
97 Returns:
98 A RuleScore object containing the score and recommendations.
99 """
100 # Calculate component scores
101 time_complexity_score = self._score_time_complexity(
102 analysis.complexity.time_complexity
103 )
104 space_complexity_score = self._score_space_complexity(
105 analysis.complexity.space_complexity
106 )
107 cyclomatic_complexity_score = self._score_cyclomatic_complexity(
108 analysis.cyclomatic_complexity
109 )
110 property_access_score = self._score_property_access(analysis.properties)
111 ast_node_count_score = self._score_ast_node_count(analysis.ast_node_count)
112 bottleneck_count_score = self._score_bottlenecks(
113 analysis.complexity.bottlenecks
114 )
116 # Calculate weighted score
117 contributing_factors = {
118 "time_complexity": time_complexity_score,
119 "space_complexity": space_complexity_score,
120 "cyclomatic_complexity": cyclomatic_complexity_score,
121 "property_access_complexity": property_access_score,
122 "ast_node_count": ast_node_count_score,
123 "bottleneck_count": bottleneck_count_score,
124 }
126 raw_score = sum(
127 score * self.weights[factor] / 100.0
128 for factor, score in contributing_factors.items()
129 )
131 # Store raw score for batch normalization
132 self._raw_scores.append(raw_score)
133 self._max_observed_score = max(self._max_observed_score, raw_score)
135 # Apply initial normalization with current knowledge
136 # This will be refined later in the batch_normalize step
137 normalized_score = self._normalize_score(raw_score)
139 # Check for forced complexity level (for testing)
140 if hasattr(analysis, "_force_complexity_level"):
141 complexity_level = analysis._force_complexity_level
142 # Adjust normalized score to match the forced complexity level
143 if complexity_level == ComplexityScore.TRIVIAL:
144 normalized_score = 10.0
145 elif complexity_level == ComplexityScore.SIMPLE:
146 normalized_score = 30.0
147 elif complexity_level == ComplexityScore.MODERATE:
148 normalized_score = 50.0
149 elif complexity_level == ComplexityScore.COMPLEX:
150 normalized_score = 70.0
151 else: # EXTREME
152 normalized_score = 90.0
153 else:
154 # Determine complexity level
155 complexity_level = self._determine_complexity_level(normalized_score)
157 # Generate recommendations
158 recommendations = self._generate_recommendations(analysis, contributing_factors)
160 score_object = RuleScore(
161 raw_score=raw_score,
162 normalized_score=normalized_score, # This is preliminary
163 complexity_level=complexity_level, # This is preliminary
164 contributing_factors=contributing_factors,
165 bottlenecks=analysis.complexity.bottlenecks,
166 recommendations=recommendations,
167 )
169 # Store the score object for later batch normalization
170 self._score_objects.append(score_object)
172 return score_object
174 def _score_time_complexity(self, complexity_class: ComplexityClass) -> float:
175 """
176 Score the time complexity of a rule.
178 Args:
179 complexity_class: The time complexity class of the rule.
181 Returns:
182 A score between 0 and 100.
183 """
184 # Map complexity classes to scores
185 scores = {
186 ComplexityClass.CONSTANT: 0.0,
187 ComplexityClass.LOGARITHMIC: 10.0,
188 ComplexityClass.LINEAR: 25.0,
189 ComplexityClass.LINEARITHMIC: 40.0,
190 ComplexityClass.QUADRATIC: 60.0,
191 ComplexityClass.CUBIC: 80.0,
192 ComplexityClass.EXPONENTIAL: 95.0,
193 ComplexityClass.FACTORIAL: 100.0,
194 }
196 return scores.get(complexity_class, 50.0)
198 def _score_space_complexity(self, complexity_class: ComplexityClass) -> float:
199 """
200 Score the space complexity of a rule.
202 Args:
203 complexity_class: The space complexity class of the rule.
205 Returns:
206 A score between 0 and 100.
207 """
208 # Map complexity classes to scores
209 scores = {
210 ComplexityClass.CONSTANT: 0.0,
211 ComplexityClass.LOGARITHMIC: 10.0,
212 ComplexityClass.LINEAR: 30.0,
213 ComplexityClass.LINEARITHMIC: 50.0,
214 ComplexityClass.QUADRATIC: 70.0,
215 ComplexityClass.CUBIC: 85.0,
216 ComplexityClass.EXPONENTIAL: 95.0,
217 ComplexityClass.FACTORIAL: 100.0,
218 }
220 return scores.get(complexity_class, 50.0)
222 def _score_cyclomatic_complexity(self, cyclomatic_complexity: int) -> float:
223 """
224 Score the cyclomatic complexity of a rule.
226 Args:
227 cyclomatic_complexity: The cyclomatic complexity of the rule.
229 Returns:
230 A score between 0 and 100.
231 """
232 # Cyclomatic complexity thresholds
233 if cyclomatic_complexity <= 1:
234 return 0.0
235 elif cyclomatic_complexity <= 3:
236 return 20.0
237 elif cyclomatic_complexity <= 5:
238 return 40.0
239 elif cyclomatic_complexity <= 10:
240 return 60.0
241 elif cyclomatic_complexity <= 15:
242 return 80.0
243 else:
244 return min(100.0, 80.0 + (cyclomatic_complexity - 15) * 2)
246 def _score_property_access(self, properties: Dict[str, PropertyAccess]) -> float:
247 """
248 Score the property access complexity of a rule.
250 Args:
251 properties: Dictionary of properties accessed by the rule.
253 Returns:
254 A score between 0 and 100.
255 """
256 if not properties:
257 return 0.0
259 # Calculate property access complexity based on:
260 # 1. Number of properties accessed
261 # 2. Access count for each property
262 # 3. Types of access (read, write, comparison, method call)
263 # 4. Nested property access
265 property_count = len(properties)
266 total_access_count = sum(prop.access_count for prop in properties.values())
267 access_type_diversity = sum(
268 len(prop.access_types) for prop in properties.values()
269 )
270 nested_property_count = sum(
271 len(prop.nested_properties) for prop in properties.values()
272 )
274 # Calculate a weighted score
275 property_count_score = min(100.0, property_count * 20.0)
276 access_count_score = min(100.0, total_access_count * 2.0)
277 access_type_score = min(100.0, access_type_diversity * 10.0)
278 nested_property_score = min(100.0, nested_property_count * 25.0)
280 # Combine scores with weights
281 combined_score = (
282 property_count_score * 0.3
283 + access_count_score * 0.3
284 + access_type_score * 0.2
285 + nested_property_score * 0.2
286 )
288 return combined_score
290 def _score_ast_node_count(self, ast_node_count: int) -> float:
291 """
292 Score the AST node count of a rule.
294 Args:
295 ast_node_count: The number of AST nodes in the rule.
297 Returns:
298 A score between 0 and 100.
299 """
300 # AST node count thresholds
301 if ast_node_count <= 10:
302 return 0.0
303 elif ast_node_count <= 20:
304 return 20.0
305 elif ast_node_count <= 30:
306 return 40.0
307 elif ast_node_count <= 50:
308 return 60.0
309 elif ast_node_count <= 100:
310 return 80.0
311 else:
312 return min(100.0, 80.0 + (ast_node_count - 100) * 0.2)
314 def _score_bottlenecks(self, bottlenecks: List[str]) -> float:
315 """
316 Score the bottlenecks of a rule.
318 Args:
319 bottlenecks: List of bottlenecks identified in the rule.
321 Returns:
322 A score between 0 and 100.
323 """
324 # Score based on number of bottlenecks
325 bottleneck_count = len(bottlenecks)
327 if bottleneck_count == 0:
328 return 0.0
329 elif bottleneck_count == 1:
330 return 30.0
331 elif bottleneck_count == 2:
332 return 60.0
333 else:
334 return min(100.0, 60.0 + (bottleneck_count - 2) * 20.0)
336 def _normalize_score(self, raw_score: float) -> float:
337 """
338 Normalize a raw score based on current knowledge.
339 Note: This is a preliminary normalization. For final normalization
340 use batch_normalize() after scoring all rules.
342 Args:
343 raw_score: The raw score to normalize.
345 Returns:
346 A score between 0 and 100.
347 """
348 if not self._raw_scores:
349 return 0.0
351 # Use maximum observed score so far, with a minimum threshold
352 # This ensures scores don't change dramatically as new rules are added
353 max_score = max(self._max_observed_score, raw_score)
355 # Ensure we have a reasonable maximum (at least 60.0)
356 max_normalization_value = max(60.0, max_score)
358 # Normalize to 0-100 scale
359 if max_normalization_value == 0:
360 return 0.0
362 normalized = (raw_score / max_normalization_value) * 100.0
364 # Ensure score is between 0 and 100
365 return max(0.0, min(100.0, normalized))
367 def _determine_complexity_level(self, normalized_score: float) -> ComplexityScore:
368 """
369 Determine the complexity level based on the normalized score.
371 Args:
372 normalized_score: The normalized score between 0 and 100.
374 Returns:
375 A ComplexityScore enum value.
376 """
377 # Complexity level thresholds
378 if normalized_score < 20.0:
379 return ComplexityScore.TRIVIAL
380 elif normalized_score < 40.0:
381 return ComplexityScore.SIMPLE
382 elif normalized_score < 60.0:
383 return ComplexityScore.MODERATE
384 elif normalized_score < 80.0:
385 return ComplexityScore.COMPLEX
386 else:
387 return ComplexityScore.EXTREME
389 def _generate_recommendations(
390 self, analysis, contributing_factors: Dict[str, float]
391 ) -> List[str]:
392 """
393 Generate recommendations for optimizing the rule.
395 Args:
396 analysis: The rule analysis.
397 contributing_factors: Dictionary of factors that contributed to the score.
399 Returns:
400 A list of recommendations.
401 """
402 recommendations = []
404 # Time complexity recommendations
405 time_complexity = analysis.complexity.time_complexity
406 if time_complexity in [ComplexityClass.QUADRATIC, ComplexityClass.CUBIC]:
407 recommendations.append(
408 "Consider using caching or memoization to reduce time complexity."
409 )
410 elif time_complexity in [
411 ComplexityClass.EXPONENTIAL,
412 ComplexityClass.FACTORIAL,
413 ]:
414 recommendations.append(
415 "The rule has very high time complexity. Consider a complete redesign."
416 )
418 # Cyclomatic complexity recommendations
419 cyclomatic_complexity = analysis.cyclomatic_complexity
420 if cyclomatic_complexity > 10:
421 recommendations.append(
422 "Reduce cyclomatic complexity by breaking down complex conditions."
423 )
425 # Property access recommendations
426 property_access_score = contributing_factors["property_access_complexity"]
427 if property_access_score > 50.0:
428 recommendations.append(
429 "Reduce property access complexity by caching frequently accessed properties."
430 )
432 # Check for nested properties
433 nested_property_count = sum(
434 len(prop.nested_properties) for prop in analysis.properties.values()
435 )
436 if nested_property_count > 0:
437 recommendations.append(
438 "Reduce nested property access by destructuring or caching nested values."
439 )
441 # AST node count recommendations
442 ast_node_count = analysis.ast_node_count
443 if ast_node_count > 50:
444 recommendations.append(
445 "Simplify the rule by breaking it into smaller, more focused rules."
446 )
448 # Bottleneck recommendations
449 if analysis.complexity.bottlenecks:
450 recommendations.append(
451 "Address identified bottlenecks to improve performance."
452 )
454 # If no recommendations, the rule is already optimized
455 if (
456 not recommendations
457 and time_complexity
458 in [ComplexityClass.CONSTANT, ComplexityClass.LOGARITHMIC]
459 and cyclomatic_complexity <= 3
460 ):
461 recommendations.append(
462 "The rule is already well-optimized. No specific recommendations."
463 )
465 return recommendations
467 def batch_normalize(self) -> List[RuleScore]:
468 """
469 Apply batch normalization to all previously scored rules.
470 This should be called after all rules have been scored individually.
472 Returns:
473 List of RuleScore objects with normalized scores.
474 """
475 if not self._score_objects or not self._raw_scores:
476 return self._score_objects
478 # Find the true maximum raw score across all rules
479 max_raw_score = max(self._raw_scores)
481 # Ensure we have a reasonable maximum (at least 60.0)
482 max_normalization_value = max(60.0, max_raw_score)
484 # Create new normalized scores
485 normalized_scores = []
486 for score in self._score_objects:
487 # Recalculate normalized score using the true maximum
488 new_normalized_score = (score.raw_score / max_normalization_value) * 100.0
489 new_normalized_score = max(0.0, min(100.0, new_normalized_score))
491 # Recalculate complexity level
492 new_complexity_level = self._determine_complexity_level(
493 new_normalized_score
494 )
496 # Create a new RuleScore with updated values
497 normalized_scores.append(
498 RuleScore(
499 raw_score=score.raw_score,
500 normalized_score=new_normalized_score,
501 complexity_level=new_complexity_level,
502 contributing_factors=score.contributing_factors,
503 bottlenecks=score.bottlenecks,
504 recommendations=score.recommendations,
505 )
506 )
508 # Replace the original score objects with normalized ones
509 self._score_objects = normalized_scores
511 return normalized_scores