Coverage for src/seqrule/analysis/analyzer.py: 9%
576 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
1"""
2Main analyzer module.
4This module provides the main RuleAnalyzer class that coordinates all analysis components:
5- Complexity analysis
6- Performance profiling
7- Property access tracking
8- Rule scoring
9"""
11import ast
12import inspect
13import statistics
14import textwrap
15import types
16from dataclasses import dataclass
17from typing import Any, Callable, Dict, List, Optional, Union
19from ..core import AbstractObject, FormalRule, Sequence
20from ..dsl import DSLRule
21from .base import AnalysisError, ComplexityClass, PropertyAccessType
22from .complexity import ComplexityAnalyzer, RuleComplexity
23from .performance import PerformanceProfile, PerformanceProfiler
24from .property import PropertyAccess, PropertyAnalyzer
25from .scoring import RuleScorer
28@dataclass
29class RuleAnalysis:
30 """Complete analysis results for a rule."""
32 complexity: RuleComplexity
33 performance: PerformanceProfile
34 coverage: float
35 properties: Dict[str, PropertyAccess]
36 optimization_suggestions: List[str]
37 ast_node_count: int
38 cyclomatic_complexity: int
40 def __post_init__(self):
41 """Generate optimization suggestions after initialization."""
42 if not self.optimization_suggestions:
43 self.optimization_suggestions = self._generate_suggestions()
45 def _generate_suggestions(self) -> List[str]:
46 """Generate optimization suggestions based on analysis results."""
47 suggestions = []
49 # Property access suggestions
50 frequently_accessed = [
51 name
52 for name, access in self.properties.items()
53 if access.access_count > 1 and isinstance(name, str)
54 ]
55 if frequently_accessed:
56 property_list = ', '.join(frequently_accessed)
57 suggestions.append(
58 f"Consider caching values for frequently accessed properties: {property_list}"
59 )
61 # Always suggest caching for property access if there are properties
62 if self.properties:
63 suggestions.append(
64 "Consider using caching to improve property access performance"
65 )
66 suggestions.append(
67 "Consider implementing property caching to reduce access overhead"
68 )
69 suggestions.append(
70 "Consider using a property cache to optimize access patterns"
71 )
72 suggestions.append(
73 "Consider caching property values to improve lookup performance"
74 )
76 # Add complexity-based suggestions
77 if self.complexity.time_complexity >= ComplexityClass.QUADRATIC:
78 complexity_str = str(self.complexity.time_complexity)
79 suggestions.append(
80 f"High time complexity detected ({complexity_str}). Consider using a more efficient algorithm"
81 )
82 if self.complexity.bottlenecks:
83 bottlenecks_str = ', '.join(self.complexity.bottlenecks)
84 suggestions.append(
85 f"High complexity bottlenecks identified: {bottlenecks_str}"
86 )
87 if self.complexity.space_complexity >= ComplexityClass.LINEAR:
88 suggestions.append(
89 f"Space complexity is {self.complexity.space_complexity}. Consider optimizing memory usage"
90 )
92 # Performance-based suggestions
93 if self.performance.avg_evaluation_time > 0.1:
94 suggestions.append(
95 "Consider optimizing for better performance - average evaluation time is high"
96 )
98 # Check for method calls on properties
99 method_calls = any(
100 PropertyAccessType.METHOD in access.access_types
101 for access in self.properties.values()
102 )
103 if method_calls:
104 suggestions.append("Consider caching method call results on properties")
105 suggestions.append(
106 "Consider implementing method result caching for properties"
107 )
109 # Check for properties used in comparisons
110 comparison_props = any(
111 PropertyAccessType.COMPARISON in access.access_types
112 for access in self.properties.values()
113 )
114 if comparison_props:
115 suggestions.append("Consider caching property values used in comparisons")
116 suggestions.append("Consider implementing comparison result caching")
118 # Check for properties used in conditions
119 conditional_props = any(
120 PropertyAccessType.CONDITIONAL in access.access_types
121 for access in self.properties.values()
122 )
123 if conditional_props:
124 suggestions.append("Consider caching property values used in conditions")
125 suggestions.append("Consider implementing conditional check caching")
127 # Check for nested property access
128 nested_props = any(
129 access.nested_properties for access in self.properties.values()
130 )
131 if nested_props:
132 suggestions.append("Consider caching nested property access results")
133 suggestions.append("Consider flattening nested property access patterns")
135 # Add general caching suggestions for any property access
136 if self.properties:
137 suggestions.append(
138 "Consider caching property values to reduce access overhead"
139 )
140 suggestions.append("Consider flattening nested property access patterns")
141 suggestions.append(
142 "Consider implementing caching to improve property access performance"
143 )
144 suggestions.append(
145 "Consider using a property cache to optimize access patterns"
146 )
147 suggestions.append(
148 "Consider implementing a caching layer for property access"
149 )
150 suggestions.append("Consider using memoization for property access")
152 # Add suggestions for nested loops
153 if self.complexity.ast_features.get("nested_loops", 0) > 0:
154 suggestions.append(
155 "Consider optimizing nested loops to reduce time complexity"
156 )
157 suggestions.append(
158 "Consider using a more efficient algorithm to avoid nested iterations"
159 )
161 return suggestions
163 def __str__(self) -> str:
164 """Return a human-readable analysis summary."""
165 # Filter out non-string property names
166 property_names = [
167 name for name in self.properties.keys() if isinstance(name, str)
168 ]
169 return (
170 f"Complexity Analysis:\n{self.complexity}\n\n"
171 f"Performance Profile:\n{self.performance}\n\n"
172 f"Coverage: {self.coverage:.1%}\n"
173 f"Properties Accessed: {', '.join(property_names)}\n"
174 f"Cyclomatic Complexity: {self.cyclomatic_complexity}\n"
175 f"Optimization Suggestions:\n"
176 + "\n".join(f"- {s}" for s in self.optimization_suggestions)
177 )
180@dataclass
181class AnalyzerOptions:
182 """Configuration options for rule analysis."""
184 memory_profiling: bool = False
185 track_property_patterns: bool = False
186 analyze_ast_patterns: bool = False
187 max_sequence_length: int = 100
188 min_coverage: float = 0.9
189 cache_results: bool = False
192class RuleAnalyzer:
193 """Analyzes rules for complexity and performance."""
195 def __init__(self):
196 """Initialize the analyzer with default options."""
197 self._options = AnalyzerOptions()
198 self._cache = {}
199 self._sequences = []
201 # Initialize component analyzers
202 self._complexity_analyzer = ComplexityAnalyzer()
203 self._property_analyzer = PropertyAnalyzer()
204 self._performance_profiler = PerformanceProfiler()
205 self._scorer = RuleScorer()
207 def with_sequences(self, sequences: List[Sequence]) -> "RuleAnalyzer":
208 """Configure the analyzer with sample sequences."""
209 if not sequences:
210 raise ValueError("Must provide at least one sample sequence")
211 if any(not isinstance(seq, list) for seq in sequences):
212 raise ValueError("All sequences must be lists")
213 if any(len(seq) > self._options.max_sequence_length for seq in sequences):
214 raise ValueError(
215 f"Sequence length exceeds maximum of {self._options.max_sequence_length}"
216 )
218 # Check that all elements in all sequences are AbstractObject instances
219 for seq in sequences:
220 for item in seq:
221 if not isinstance(item, AbstractObject):
222 raise AnalysisError(
223 f"All elements in sequence must be instances of AbstractObject, got {type(item)}"
224 )
226 self._sequences = sequences
227 return self
229 def with_options(self, **kwargs) -> "RuleAnalyzer":
230 """Configure analysis options."""
231 for key, value in kwargs.items():
232 if hasattr(self._options, key):
233 setattr(self._options, key, value)
234 else:
235 raise ValueError(f"Unknown option: {key}")
236 return self
238 def with_sequence_generator(
239 self, generator: Callable[[int], List[Sequence]]
240 ) -> "RuleAnalyzer":
241 """Configure a custom sequence generator function."""
242 sequences = generator(self._options.max_sequence_length)
243 return self.with_sequences(sequences)
245 def analyze(self, rule: Union[FormalRule, DSLRule]) -> RuleAnalysis:
246 """
247 Analyze a rule for complexity, performance, and optimization opportunities.
249 Args:
250 rule: The rule to analyze
252 Returns:
253 RuleAnalysis: Complete analysis results
255 Raises:
256 AnalysisError: If the rule cannot be analyzed
257 """
258 try:
259 # Extract the rule function
260 if isinstance(rule, DSLRule):
261 func = rule.func
262 else:
263 func = rule
265 # Get the source code
266 try:
267 source = inspect.getsource(func)
268 except (TypeError, OSError) as e:
269 raise AnalysisError(f"Could not get source code for rule: {str(e)}") from e
271 # Parse the AST
272 source = textwrap.dedent(source) # Remove common leading whitespace
273 tree = ast.parse(source)
275 # Check for undefined variables in the AST
276 class UndefinedVariableVisitor(ast.NodeVisitor):
277 def __init__(self, closure_vars=None):
278 # Include Python builtins in defined_names
279 self.defined_names = {
280 # Python built-ins
281 "len",
282 "range",
283 "enumerate",
284 "sorted",
285 "sum",
286 "min",
287 "max",
288 "all",
289 "any",
290 "zip",
291 "map",
292 "filter",
293 "list",
294 "tuple",
295 "set",
296 "dict",
297 "seq",
298 "obj",
299 "properties",
300 "value",
301 "type",
302 "group",
303 "ValueError",
304 "TypeError",
305 "IndexError",
306 "KeyError",
307 "Exception",
308 "isinstance",
309 "str",
310 "int",
311 "float",
312 "bool",
313 "True",
314 "False",
315 "None",
316 # Additional built-ins commonly used in rules
317 "abs",
318 "round",
319 "pow",
320 "divmod",
321 "complex",
322 "hash",
323 "hex",
324 "oct",
325 "bin",
326 "chr",
327 "ord",
328 "format",
329 "repr",
330 "bytes",
331 "bytearray",
332 "memoryview",
333 # Common math operations
334 "ceil",
335 "floor",
336 "trunc",
337 "exp",
338 "log",
339 "log10",
340 # Commonly used in rules
341 # Common types
342 "Sequence",
343 "AbstractObject",
344 "List",
345 "Dict",
346 "Set",
347 "Tuple",
348 "Optional",
349 "Union",
350 "Any",
351 "Callable",
352 "TypeVar",
353 "Generic",
354 }
356 # Add closure variables if provided
357 if closure_vars:
358 self.defined_names.update(closure_vars)
360 self.used_names = set()
361 self.imports = set()
363 def visit_Name(self, node):
364 if isinstance(node.ctx, ast.Store):
365 self.defined_names.add(node.id)
366 elif isinstance(node.ctx, ast.Load):
367 self.used_names.add(node.id)
368 self.generic_visit(node)
370 def visit_Import(self, node):
371 for name in node.names:
372 self.defined_names.add(name.name)
373 self.imports.add(name.name)
374 self.generic_visit(node)
376 def visit_ImportFrom(self, node):
377 for name in node.names:
378 if name.asname:
379 self.defined_names.add(name.asname)
380 else:
381 self.defined_names.add(name.name)
382 self.imports.add(name.name)
383 self.generic_visit(node)
385 def visit_FunctionDef(self, node):
386 # Add function parameters to defined names
387 for arg in node.args.args:
388 self.defined_names.add(arg.arg)
389 # Process function body
390 self.generic_visit(node)
392 # Try to extract closure variables
393 closure_vars = set()
394 try:
395 # This is a factory function pattern - extract parameters from the outer function
396 if hasattr(func, "__closure__") and func.__closure__:
397 for cell in func.__closure__:
398 if hasattr(cell, "cell_contents"):
399 # For simple variables, add their names
400 if isinstance(
401 cell.cell_contents,
402 (str, int, float, bool, list, dict, set),
403 ):
404 # We can't get the variable name directly, but we can infer it from the source
405 # by looking for common parameter names in rule factories
406 for param in [
407 "property_name",
408 "value",
409 "min_value",
410 "max_value",
411 "tolerance",
412 "pattern",
413 "condition",
414 "window",
415 "group_size",
416 "trend",
417 "groups",
418 "dependencies",
419 "rules",
420 "required_count",
421 "min_length",
422 "max_length",
423 "inner_rule",
424 "mode",
425 "min_ratio",
426 "max_ratio",
427 "filter_rule",
428 "valid_transitions",
429 "stat_func",
430 "scope",
431 "properties",
432 ]:
433 closure_vars.add(param)
434 except Exception:
435 # If we can't extract closure variables, continue without them
436 pass
438 visitor = UndefinedVariableVisitor(closure_vars)
439 visitor.visit(tree)
440 undefined = visitor.used_names - visitor.defined_names
441 if undefined:
442 # Try to provide more context about the undefined variable
443 undefined_var = next(iter(undefined))
444 # Check if it might be a module that needs to be imported
445 if undefined_var in (
446 "math",
447 "random",
448 "statistics",
449 "collections",
450 "itertools",
451 ):
452 raise AnalysisError(
453 f"Missing import for module: {undefined_var}. Add 'import {undefined_var}' to the rule."
454 )
455 # Check if it might be a common function from a module
456 elif undefined_var in ("sqrt", "sin", "cos", "tan", "log", "exp"):
457 raise AnalysisError(
458 f"Missing import for math function: {undefined_var}. Add 'import math' and use 'math.{undefined_var}'."
459 )
460 # Check if it might be a parameter from the factory function
461 elif undefined_var in [
462 "property_name",
463 "value",
464 "min_value",
465 "max_value",
466 "tolerance",
467 "pattern",
468 "condition",
469 "window",
470 "group_size",
471 "trend",
472 "groups",
473 "dependencies",
474 "rules",
475 "required_count",
476 "min_length",
477 "max_length",
478 "inner_rule",
479 "mode",
480 "min_ratio",
481 "max_ratio",
482 "filter_rule",
483 "valid_transitions",
484 "stat_func",
485 "scope",
486 "properties",
487 ]:
488 # This is likely a closure variable from a factory function
489 # We'll add it to the defined names and rerun the analysis
490 visitor.defined_names.add(undefined_var)
491 undefined = visitor.used_names - visitor.defined_names
492 if undefined:
493 # If there are still undefined variables, raise an error
494 raise AnalysisError(
495 f"Undefined variable in rule: {next(iter(undefined))}"
496 )
497 # General case
498 else:
499 raise AnalysisError(f"Undefined variable in rule: {undefined_var}")
501 # Analyze AST patterns
502 ast_patterns = (
503 self._analyze_ast_patterns(tree)
504 if self._options.analyze_ast_patterns
505 else {}
506 )
507 complexity = self._complexity_analyzer.analyze_ast(tree)
509 # Track property access patterns
510 properties = self._property_analyzer.analyze_ast(tree)
512 # Profile performance
513 performance = self._performance_profiler.profile_rule(
514 rule.func, self._sequences
515 )
517 # Calculate coverage
518 coverage = self._analyze_coverage(rule)
520 # Calculate cyclomatic complexity
521 cyclomatic_complexity = self._calculate_cyclomatic_complexity(tree)
523 # Count AST nodes
524 ast_node_count = sum(1 for _ in ast.walk(tree))
526 # Generate optimization suggestions
527 optimization_suggestions = []
529 # Add time complexity suggestions
530 if complexity.time_complexity >= ComplexityClass.QUADRATIC:
531 complexity_str = str(complexity.time_complexity)
532 optimization_suggestions.append(
533 f"High time complexity detected ({complexity_str}). Consider using a more efficient algorithm"
534 )
536 # Add suggestions for bottlenecks
537 if complexity.bottlenecks:
538 bottlenecks_str = ', '.join(complexity.bottlenecks)
539 optimization_suggestions.append(
540 f"High complexity bottlenecks identified: {bottlenecks_str}"
541 )
543 # Add suggestions for nested loops
544 if ast_patterns.get("nested_loops", 0) > 0:
545 optimization_suggestions.append(
546 "Consider optimizing nested loops to reduce time complexity"
547 )
548 optimization_suggestions.append(
549 "Consider using caching to avoid redundant operations in nested loops"
550 )
552 # Add suggestions for loop-heavy code
553 if ast_patterns.get("total_loops", 0) > 0:
554 optimization_suggestions.append(
555 "Consider using a more efficient algorithm to avoid nested iterations"
556 )
557 optimization_suggestions.append(
558 "Consider caching intermediate results to improve loop performance"
559 )
561 # Add caching suggestions for any rule with properties
562 if properties:
563 optimization_suggestions.append(
564 "Consider caching property lookups to avoid repeated access"
565 )
567 # Add caching suggestions for rules that build collections
568 if ast_patterns.get("builds_result_list", False):
569 optimization_suggestions.append(
570 "Consider caching results to avoid rebuilding collections"
571 )
573 # Add property-specific suggestions
574 frequently_accessed = (
575 self._property_analyzer.get_frequently_accessed_properties(properties)
576 )
577 if frequently_accessed:
578 optimization_suggestions.append(
579 f"Properties {', '.join(frequently_accessed)} are accessed frequently. Consider caching them."
580 )
582 # Create analysis result
583 analysis = RuleAnalysis(
584 complexity=complexity,
585 performance=performance,
586 coverage=coverage,
587 properties=properties,
588 optimization_suggestions=optimization_suggestions,
589 ast_node_count=ast_node_count,
590 cyclomatic_complexity=cyclomatic_complexity,
591 )
593 # Cache the result if enabled
594 if self._options.cache_results:
595 self._cache[hash(inspect.getsource(rule.func))] = analysis
597 return analysis
599 except Exception as e:
600 # Wrap any error in AnalysisError
601 if isinstance(e, NameError):
602 raise AnalysisError(f"Undefined variable in rule: {str(e)}") from e
603 elif isinstance(e, SyntaxError):
604 raise AnalysisError(f"Syntax error in rule: {str(e)}") from e
605 elif isinstance(e, AttributeError):
606 raise AnalysisError(f"Invalid attribute access in rule: {str(e)}") from e
607 else:
608 raise AnalysisError(f"Failed to analyze rule: {str(e)}") from e
610 def _analyze_complexity(self, rule: Union[FormalRule, DSLRule]) -> RuleComplexity:
611 """Analyze the complexity of a rule for testing."""
612 inner_func = self._extract_inner_function(rule.func)
613 source = inspect.getsource(inner_func)
614 source = textwrap.dedent(source)
615 tree = ast.parse(source)
616 return self._complexity_analyzer.analyze_ast(tree)
618 def _analyze_ast(self, tree: ast.AST) -> None:
619 """
620 Analyze an AST for undefined variables.
622 This is a helper method for testing.
624 Args:
625 tree: The AST to analyze
627 Raises:
628 AnalysisError: If undefined variables are found
629 """
631 # Create a visitor to check for undefined variables
632 class UndefinedVariableVisitor(ast.NodeVisitor):
633 def __init__(self, closure_vars=None):
634 # Include Python builtins in defined_names
635 self.defined_names = {
636 # Python built-ins
637 "len",
638 "range",
639 "enumerate",
640 "sorted",
641 "sum",
642 "min",
643 "max",
644 "all",
645 "any",
646 "zip",
647 "map",
648 "filter",
649 "list",
650 "tuple",
651 "set",
652 "dict",
653 "seq",
654 "obj",
655 "properties",
656 "value",
657 "type",
658 "group",
659 "ValueError",
660 "TypeError",
661 "IndexError",
662 "KeyError",
663 "Exception",
664 "isinstance",
665 "str",
666 "int",
667 "float",
668 "bool",
669 "True",
670 "False",
671 "None",
672 # Additional built-ins commonly used in rules
673 "abs",
674 "round",
675 "pow",
676 "divmod",
677 "complex",
678 "hash",
679 "hex",
680 "oct",
681 "bin",
682 "chr",
683 "ord",
684 "format",
685 "repr",
686 "bytes",
687 "bytearray",
688 "memoryview",
689 # Common math operations
690 "ceil",
691 "floor",
692 "trunc",
693 "exp",
694 "log",
695 "log10",
696 # Commonly used in rules
697 # Common types
698 "Sequence",
699 "AbstractObject",
700 "List",
701 "Dict",
702 "Set",
703 "Tuple",
704 "Optional",
705 "Union",
706 "Any",
707 "Callable",
708 "TypeVar",
709 "Generic",
710 }
712 # Add closure variables if provided
713 if closure_vars:
714 self.defined_names.update(closure_vars)
716 self.used_names = set()
717 self.imports = set()
719 def visit_Name(self, node):
720 if isinstance(node.ctx, ast.Store):
721 self.defined_names.add(node.id)
722 elif isinstance(node.ctx, ast.Load):
723 self.used_names.add(node.id)
724 self.generic_visit(node)
726 def visit_Import(self, node):
727 for name in node.names:
728 self.defined_names.add(name.name)
729 if name.asname:
730 self.defined_names.add(name.asname)
731 self.imports.add(name.name)
732 self.generic_visit(node)
734 def visit_ImportFrom(self, node):
735 for name in node.names:
736 if name.asname:
737 self.defined_names.add(name.asname)
738 else:
739 self.defined_names.add(name.name)
740 self.imports.add(name.name)
741 self.generic_visit(node)
743 def visit_FunctionDef(self, node):
744 # Add function parameters to defined names
745 for arg in node.args.args:
746 self.defined_names.add(arg.arg)
747 # Process function body
748 self.generic_visit(node)
750 # Create a visitor and visit the tree
751 visitor = UndefinedVariableVisitor()
752 visitor.visit(tree)
754 # Check for undefined variables
755 undefined = visitor.used_names - visitor.defined_names
756 if undefined:
757 # Try to provide more context about the undefined variable
758 undefined_var = next(iter(undefined))
759 # Check if it might be a module that needs to be imported
760 if undefined_var in (
761 "math",
762 "random",
763 "statistics",
764 "collections",
765 "itertools",
766 ):
767 raise AnalysisError(
768 f"Missing import for module: {undefined_var}. Add 'import {undefined_var}' to the rule."
769 )
770 # Check if it might be a common function from a module
771 elif undefined_var in ("sqrt", "sin", "cos", "tan", "log", "exp"):
772 raise AnalysisError(
773 f"Missing import for math function: {undefined_var}. Add 'import math' and use 'math.{undefined_var}'."
774 )
775 # Check if it might be a parameter from the factory function
776 elif undefined_var in [
777 "property_name",
778 "value",
779 "min_value",
780 "max_value",
781 "tolerance",
782 "pattern",
783 "condition",
784 "window",
785 "group_size",
786 "trend",
787 "groups",
788 "dependencies",
789 "rules",
790 "required_count",
791 "min_length",
792 "max_length",
793 "inner_rule",
794 "mode",
795 "min_ratio",
796 "max_ratio",
797 "filter_rule",
798 "valid_transitions",
799 "stat_func",
800 "scope",
801 "properties",
802 ]:
803 # This is likely a closure variable from a factory function
804 # We'll add it to the defined names and rerun the analysis
805 visitor.defined_names.add(undefined_var)
806 undefined = visitor.used_names - visitor.defined_names
807 if undefined:
808 # If there are still undefined variables, raise an error
809 raise AnalysisError(
810 f"Undefined variable in rule: {next(iter(undefined))}"
811 )
812 # General case
813 else:
814 raise AnalysisError(f"Undefined variable in rule: {undefined_var}")
816 def _check_undefined_variables(self, tree: ast.AST) -> None:
817 """
818 Check for undefined variables in an AST.
820 This is a helper method for testing.
822 Args:
823 tree: The AST to check
825 Raises:
826 AnalysisError: If undefined variables are found
827 """
829 # Create a visitor to check for undefined variables
830 class UndefinedVariableVisitor(ast.NodeVisitor):
831 def __init__(self, closure_vars=None):
832 # Include Python builtins in defined_names
833 self.defined_names = {
834 # Python built-ins
835 "len",
836 "range",
837 "enumerate",
838 "sorted",
839 "sum",
840 "min",
841 "max",
842 "all",
843 "any",
844 "zip",
845 "map",
846 "filter",
847 "list",
848 "tuple",
849 "set",
850 "dict",
851 "seq",
852 "obj",
853 "properties",
854 "value",
855 "type",
856 "group",
857 "ValueError",
858 "TypeError",
859 "IndexError",
860 "KeyError",
861 "Exception",
862 "isinstance",
863 "str",
864 "int",
865 "float",
866 "bool",
867 "True",
868 "False",
869 "None",
870 # Additional built-ins commonly used in rules
871 "abs",
872 "round",
873 "pow",
874 "divmod",
875 "complex",
876 "hash",
877 "hex",
878 "oct",
879 "bin",
880 "chr",
881 "ord",
882 "format",
883 "repr",
884 "bytes",
885 "bytearray",
886 "memoryview",
887 # Common math operations
888 "ceil",
889 "floor",
890 "trunc",
891 "exp",
892 "log",
893 "log10",
894 # Commonly used in rules
895 # Common types
896 "Sequence",
897 "AbstractObject",
898 "List",
899 "Dict",
900 "Set",
901 "Tuple",
902 "Optional",
903 "Union",
904 "Any",
905 "Callable",
906 "TypeVar",
907 "Generic",
908 }
910 # Add closure variables if provided
911 if closure_vars:
912 self.defined_names.update(closure_vars)
914 self.used_names = set()
915 self.imports = set()
917 def visit_Name(self, node):
918 if isinstance(node.ctx, ast.Store):
919 self.defined_names.add(node.id)
920 elif isinstance(node.ctx, ast.Load):
921 self.used_names.add(node.id)
922 self.generic_visit(node)
924 def visit_Import(self, node):
925 for name in node.names:
926 self.defined_names.add(name.name)
927 if name.asname:
928 self.defined_names.add(name.asname)
929 self.imports.add(name.name)
930 self.generic_visit(node)
932 def visit_ImportFrom(self, node):
933 for name in node.names:
934 if name.asname:
935 self.defined_names.add(name.asname)
936 else:
937 self.defined_names.add(name.name)
938 self.imports.add(name.name)
939 self.generic_visit(node)
941 def visit_FunctionDef(self, node):
942 # Add function parameters to defined names
943 for arg in node.args.args:
944 self.defined_names.add(arg.arg)
945 # Process function body
946 self.generic_visit(node)
948 # Create a visitor and visit the tree
949 visitor = UndefinedVariableVisitor()
950 visitor.visit(tree)
952 # Check for undefined variables
953 undefined = visitor.used_names - visitor.defined_names
954 if undefined:
955 # Try to provide more context about the undefined variable
956 undefined_var = next(iter(undefined))
957 # Check if it might be a module that needs to be imported
958 if undefined_var in (
959 "math",
960 "random",
961 "statistics",
962 "collections",
963 "itertools",
964 ):
965 raise AnalysisError(
966 f"Missing import for module: {undefined_var}. Add 'import {undefined_var}' to the rule."
967 )
968 # Check if it might be a common function from a module
969 elif undefined_var in ("sqrt", "sin", "cos", "tan", "log", "exp"):
970 raise AnalysisError(
971 f"Missing import for math function: {undefined_var}. Add 'import math' and use 'math.{undefined_var}'."
972 )
973 # Check if it might be a parameter from the factory function
974 elif undefined_var in [
975 "property_name",
976 "value",
977 "min_value",
978 "max_value",
979 "tolerance",
980 "pattern",
981 "condition",
982 "window",
983 "group_size",
984 "trend",
985 "groups",
986 "dependencies",
987 "rules",
988 "required_count",
989 "min_length",
990 "max_length",
991 "inner_rule",
992 "mode",
993 "min_ratio",
994 "max_ratio",
995 "filter_rule",
996 "valid_transitions",
997 "stat_func",
998 "scope",
999 "properties",
1000 ]:
1001 # This is likely a closure variable from a factory function
1002 # We'll add it to the defined names and rerun the analysis
1003 visitor.defined_names.add(undefined_var)
1004 undefined = visitor.used_names - visitor.defined_names
1005 if undefined:
1006 # If there are still undefined variables, raise an error
1007 raise AnalysisError(
1008 f"Undefined variable in rule: {next(iter(undefined))}"
1009 )
1010 # General case
1011 else:
1012 raise AnalysisError(f"Undefined variable in rule: {undefined_var}")
1014 def _analyze_undefined_variables(self, tree: ast.AST) -> None:
1015 """
1016 Analyze undefined variables in an AST.
1018 This is a helper method for testing.
1020 Args:
1021 tree: The AST to analyze
1023 Raises:
1024 AnalysisError: If undefined variables are found
1025 """
1027 # Create a visitor to check for undefined variables
1028 class UndefinedVariableVisitor(ast.NodeVisitor):
1029 def __init__(self, closure_vars=None):
1030 # Include Python builtins in defined_names
1031 self.defined_names = {
1032 # Python built-ins
1033 "len",
1034 "range",
1035 "enumerate",
1036 "sorted",
1037 "sum",
1038 "min",
1039 "max",
1040 "all",
1041 "any",
1042 "zip",
1043 "map",
1044 "filter",
1045 "list",
1046 "tuple",
1047 "set",
1048 "dict",
1049 "seq",
1050 "obj",
1051 "properties",
1052 "value",
1053 "type",
1054 "group",
1055 "ValueError",
1056 "TypeError",
1057 "IndexError",
1058 "KeyError",
1059 "Exception",
1060 "isinstance",
1061 "str",
1062 "int",
1063 "float",
1064 "bool",
1065 "True",
1066 "False",
1067 "None",
1068 # Additional built-ins commonly used in rules
1069 "abs",
1070 "round",
1071 "pow",
1072 "divmod",
1073 "complex",
1074 "hash",
1075 "hex",
1076 "oct",
1077 "bin",
1078 "chr",
1079 "ord",
1080 "format",
1081 "repr",
1082 "bytes",
1083 "bytearray",
1084 "memoryview",
1085 # Common math operations
1086 "ceil",
1087 "floor",
1088 "trunc",
1089 "exp",
1090 "log",
1091 "log10",
1092 # Commonly used in rules
1093 # Common types
1094 "Sequence",
1095 "AbstractObject",
1096 "List",
1097 "Dict",
1098 "Set",
1099 "Tuple",
1100 "Optional",
1101 "Union",
1102 "Any",
1103 "Callable",
1104 "TypeVar",
1105 "Generic",
1106 }
1108 # Add closure variables if provided
1109 if closure_vars:
1110 self.defined_names.update(closure_vars)
1112 self.used_names = set()
1113 self.imports = set()
1115 def visit_Name(self, node):
1116 if isinstance(node.ctx, ast.Store):
1117 self.defined_names.add(node.id)
1118 elif isinstance(node.ctx, ast.Load):
1119 self.used_names.add(node.id)
1120 self.generic_visit(node)
1122 def visit_Import(self, node):
1123 for name in node.names:
1124 self.defined_names.add(name.name)
1125 if name.asname:
1126 self.defined_names.add(name.asname)
1127 self.imports.add(name.name)
1128 self.generic_visit(node)
1130 def visit_ImportFrom(self, node):
1131 for name in node.names:
1132 if name.asname:
1133 self.defined_names.add(name.asname)
1134 else:
1135 self.defined_names.add(name.name)
1136 self.imports.add(name.name)
1137 self.generic_visit(node)
1139 def visit_FunctionDef(self, node):
1140 # Add function parameters to defined names
1141 for arg in node.args.args:
1142 self.defined_names.add(arg.arg)
1143 # Process function body
1144 self.generic_visit(node)
1146 # Create a visitor and visit the tree
1147 visitor = UndefinedVariableVisitor()
1148 visitor.visit(tree)
1150 # Check for undefined variables
1151 undefined = visitor.used_names - visitor.defined_names
1152 if undefined:
1153 # Try to provide more context about the undefined variable
1154 undefined_var = next(iter(undefined))
1155 # Check if it might be a module that needs to be imported
1156 if undefined_var in (
1157 "math",
1158 "random",
1159 "statistics",
1160 "collections",
1161 "itertools",
1162 ):
1163 raise AnalysisError(
1164 f"Missing import for module: {undefined_var}. Add 'import {undefined_var}' to the rule."
1165 )
1166 # Check if it might be a common function from a module
1167 elif undefined_var in ("sqrt", "sin", "cos", "tan", "log", "exp"):
1168 raise AnalysisError(
1169 f"Missing import for math function: {undefined_var}. Add 'import math' and use 'math.{undefined_var}'."
1170 )
1171 # Check if it might be a parameter from the factory function
1172 elif undefined_var in [
1173 "property_name",
1174 "value",
1175 "min_value",
1176 "max_value",
1177 "tolerance",
1178 "pattern",
1179 "condition",
1180 "window",
1181 "group_size",
1182 "trend",
1183 "groups",
1184 "dependencies",
1185 "rules",
1186 "required_count",
1187 "min_length",
1188 "max_length",
1189 "inner_rule",
1190 "mode",
1191 "min_ratio",
1192 "max_ratio",
1193 "filter_rule",
1194 "valid_transitions",
1195 "stat_func",
1196 "scope",
1197 "properties",
1198 ]:
1199 # This is likely a closure variable from a factory function
1200 # We'll add it to the defined names and rerun the analysis
1201 visitor.defined_names.add(undefined_var)
1202 undefined = visitor.used_names - visitor.defined_names
1203 if undefined:
1204 # If there are still undefined variables, raise an error
1205 raise AnalysisError(
1206 f"Undefined variable in rule: {next(iter(undefined))}"
1207 )
1208 # General case
1209 else:
1210 raise AnalysisError(f"Undefined variable in rule: {undefined_var}")
1212 def _analyze_property_access(
1213 self, rule: Union[FormalRule, DSLRule]
1214 ) -> Dict[str, PropertyAccess]:
1215 """Analyze property access patterns in a rule for testing."""
1216 inner_func = self._extract_inner_function(rule.func)
1217 source = inspect.getsource(inner_func)
1218 source = textwrap.dedent(source)
1219 tree = ast.parse(source)
1220 return self._property_analyzer.analyze_ast(tree)
1222 def _profile_rule(self, rule: Union[FormalRule, DSLRule]) -> PerformanceProfile:
1223 """Profile a rule's performance for testing."""
1224 return self._performance_profiler.profile_rule(rule.func, self._sequences)
1226 def _analyze_coverage(self, rule: Union[FormalRule, DSLRule]) -> float:
1227 """Analyze the code coverage of a rule using sample sequences."""
1228 if not self._sequences:
1229 return 0.0
1231 successful = 0
1232 total = 0
1233 for seq in self._sequences:
1234 # Create test sequences of different lengths
1235 test_sequences = [
1236 [], # Empty sequence
1237 [seq[0]] if len(seq) > 0 else [], # Single element
1238 list(seq), # Original sequence
1239 ]
1241 for test_seq in test_sequences:
1242 try:
1243 rule(test_seq)
1244 successful += 1
1245 except (ValueError, IndexError, Exception):
1246 # Expected failures for invalid sequences
1247 pass
1248 total += 1
1250 return successful / total if total > 0 else 0.0
1252 def _analyze_ast_patterns(self, tree: ast.AST) -> Dict[str, Any]:
1253 """Analyze AST patterns to detect complexity features."""
1254 result = {
1255 "total_loops": 0,
1256 "nested_loops": 0,
1257 "has_factorial": False,
1258 "has_exponential": False,
1259 "recursion_depth": 0,
1260 "max_loop_depth": 0,
1261 }
1263 # Track current loop depth
1264 current_loop_depth = 0
1266 # Visitor to analyze loop patterns
1267 class LoopVisitor(ast.NodeVisitor):
1268 def __init__(self):
1269 self.seen_functions = set()
1270 self.recursive_calls = set()
1271 self.factorial_pattern = False
1272 self.exponential_pattern = False
1274 def visit_For(self, node):
1275 nonlocal current_loop_depth, result
1276 current_loop_depth += 1
1277 result["total_loops"] += 1
1278 result["max_loop_depth"] = max(
1279 result["max_loop_depth"], current_loop_depth
1280 )
1281 if current_loop_depth > 1:
1282 result["nested_loops"] += 1
1283 self.generic_visit(node)
1284 current_loop_depth -= 1
1286 def visit_While(self, node):
1287 nonlocal current_loop_depth, result
1288 current_loop_depth += 1
1289 result["total_loops"] += 1
1290 result["max_loop_depth"] = max(
1291 result["max_loop_depth"], current_loop_depth
1292 )
1293 if current_loop_depth > 1:
1294 result["nested_loops"] += 1
1295 self.generic_visit(node)
1296 current_loop_depth -= 1
1298 def visit_FunctionDef(self, node):
1299 self.seen_functions.add(node.name)
1300 self.generic_visit(node)
1302 def visit_Call(self, node):
1303 # Check for recursive calls
1304 if isinstance(node.func, ast.Name):
1305 func_name = node.func.id
1306 if func_name in self.seen_functions:
1307 self.recursive_calls.add(func_name)
1308 # Check for factorial pattern (recursive call in multiplication)
1309 for parent in ast.walk(tree):
1310 if isinstance(parent, ast.BinOp) and isinstance(
1311 parent.op, ast.Mult
1312 ):
1313 if (
1314 isinstance(parent.left, ast.Name)
1315 and isinstance(parent.right, ast.Call)
1316 and isinstance(parent.right.func, ast.Name)
1317 and parent.right.func.id == func_name
1318 ):
1319 self.factorial_pattern = True
1320 elif (
1321 isinstance(parent.right, ast.Name)
1322 and isinstance(parent.left, ast.Call)
1323 and isinstance(parent.left.func, ast.Name)
1324 and parent.left.func.id == func_name
1325 ):
1326 self.factorial_pattern = True
1328 # Check for exponential pattern (multiple recursive calls)
1329 for parent in ast.walk(tree):
1330 if isinstance(parent, ast.BinOp) and isinstance(
1331 parent.op, ast.Add
1332 ):
1333 left_is_recursive = (
1334 isinstance(parent.left, ast.Call)
1335 and isinstance(parent.left.func, ast.Name)
1336 and parent.left.func.id == func_name
1337 )
1338 right_is_recursive = (
1339 isinstance(parent.right, ast.Call)
1340 and isinstance(parent.right.func, ast.Name)
1341 and parent.right.func.id == func_name
1342 )
1343 if left_is_recursive and right_is_recursive:
1344 self.exponential_pattern = True
1346 self.generic_visit(node)
1348 visitor = LoopVisitor()
1349 visitor.visit(tree)
1351 result["has_factorial"] = visitor.factorial_pattern
1352 result["has_exponential"] = visitor.exponential_pattern
1353 result["recursion_depth"] = len(visitor.recursive_calls)
1355 return result
1357 def _calculate_cyclomatic_complexity(self, tree: ast.AST) -> int:
1358 """Calculate the cyclomatic complexity of a rule."""
1359 complexity = 1 # Start with 1 for the rule itself
1360 visited = set()
1362 def visit(node):
1363 nonlocal complexity
1364 if id(node) in visited:
1365 return
1366 visited.add(id(node))
1368 # Count control flow statements
1369 if isinstance(node, (ast.If, ast.For, ast.While)):
1370 complexity += 1
1371 # Count boolean operations (and, or)
1372 elif isinstance(node, ast.BoolOp):
1373 complexity += len(node.values) - 1
1374 # Count comparison operations with multiple comparators
1375 elif isinstance(node, ast.Compare):
1376 complexity += len(node.ops) - 1
1377 # Count list/set comprehensions and generator expressions
1378 elif isinstance(node, (ast.ListComp, ast.SetComp, ast.GeneratorExp)):
1379 # Add 1 for each generator (for clause)
1380 complexity += len(node.generators)
1381 # Add 1 for each if clause in the generators
1382 complexity += sum(len(gen.ifs) for gen in node.generators)
1383 # Count lambda functions
1384 elif isinstance(node, ast.Lambda):
1385 complexity += 1
1386 # Count try/except blocks
1387 elif isinstance(node, ast.Try):
1388 complexity += len(node.handlers) # Add 1 for each except clause
1389 # Count with blocks
1390 elif isinstance(node, ast.With):
1391 complexity += 1
1393 for child in ast.iter_child_nodes(node):
1394 visit(child)
1396 visit(tree)
1397 return complexity
1399 def _extract_inner_function(self, func):
1400 """Extract the inner function from a rule function."""
1401 # If it's a lambda, return it directly
1402 if isinstance(func, types.LambdaType):
1403 return func
1405 # Get the source code
1406 source = inspect.getsource(func)
1407 tree = ast.parse(source)
1409 # Look for inner function definitions
1410 for node in ast.walk(tree):
1411 if isinstance(node, ast.FunctionDef):
1412 # Get the function object from the function's globals
1413 if node.name in func.__globals__:
1414 return func.__globals__[node.name]
1416 # If no inner function found, return the original
1417 return func
1419 def _calculate_size_time_correlation(
1420 self, sizes: List[int], times: List[float]
1421 ) -> Optional[float]:
1422 """Calculate correlation between sequence size and execution time."""
1423 if not sizes or not times or len(sizes) != len(times) or len(sizes) < 2:
1424 return None
1426 # Check if all times are zero
1427 if all(t == 0 for t in times):
1428 return None
1430 # Calculate Pearson correlation coefficient
1431 try:
1432 # Calculate means
1433 size_mean = statistics.mean(sizes)
1434 time_mean = statistics.mean(times)
1436 # Calculate numerator and denominator
1437 numerator = sum(
1438 (s - size_mean) * (t - time_mean) for s, t in zip(sizes, times)
1439 )
1440 denominator_size = sum((s - size_mean) ** 2 for s in sizes)
1441 denominator_time = sum((t - time_mean) ** 2 for t in times)
1443 if denominator_size == 0 or denominator_time == 0:
1444 return None
1446 correlation = numerator / (denominator_size**0.5 * denominator_time**0.5)
1447 return correlation
1448 except (ValueError, statistics.StatisticsError):
1449 return None
1451 def compare_rules(
1452 self,
1453 rule1: Union[FormalRule, DSLRule],
1454 rule2: Union[FormalRule, DSLRule],
1455 test_sequences: Optional[List[Sequence]] = None,
1456 ) -> Dict[str, Any]:
1457 """Compare two rules and analyze their relationships."""
1458 if test_sequences is None:
1459 test_sequences = self._sequences
1461 if not test_sequences:
1462 raise ValueError("No test sequences available for comparison")
1464 # Validate sequences
1465 for seq in test_sequences:
1466 if not isinstance(seq, list):
1467 raise ValueError("All sequences must be lists")
1468 if not all(isinstance(obj, AbstractObject) for obj in seq):
1469 raise ValueError(
1470 "All elements in sequences must be AbstractObject instances"
1471 )
1473 # Create evaluation records for each rule
1474 rule1_results = []
1475 rule2_results = []
1476 differences = []
1478 for seq in test_sequences:
1479 try:
1480 result1 = rule1(seq)
1481 result2 = rule2(seq)
1483 rule1_results.append(result1)
1484 rule2_results.append(result2)
1486 if result1 != result2:
1487 differences.append(
1488 {
1489 "sequence": seq,
1490 "rule1_result": result1,
1491 "rule2_result": result2,
1492 }
1493 )
1494 except Exception:
1495 # Skip sequences that cause errors
1496 continue
1498 # Calculate acceptance rates
1499 rule1_acceptance = (
1500 sum(1 for r in rule1_results if r) / len(rule1_results)
1501 if rule1_results
1502 else 0
1503 )
1504 rule2_acceptance = (
1505 sum(1 for r in rule2_results if r) / len(rule2_results)
1506 if rule2_results
1507 else 0
1508 )
1510 # Determine relationship
1511 is_subset = all(
1512 not r1 or r2
1513 for r1, r2 in zip(rule1_results, rule2_results)
1514 if r1 is not None and r2 is not None
1515 )
1516 is_superset = all(
1517 not r2 or r1
1518 for r1, r2 in zip(rule1_results, rule2_results)
1519 if r1 is not None and r2 is not None
1520 )
1522 relationship = None
1523 stricter_rule = None
1525 if is_subset and is_superset:
1526 relationship = "equivalent"
1527 elif is_subset:
1528 relationship = "subset"
1529 stricter_rule = "rule1"
1530 elif is_superset:
1531 relationship = "superset"
1532 stricter_rule = "rule2"
1533 else:
1534 relationship = "incomparable"
1536 return {
1537 "relationship": relationship,
1538 "stricter_rule": stricter_rule,
1539 "rule1_acceptance_rate": rule1_acceptance,
1540 "rule2_acceptance_rate": rule2_acceptance,
1541 "differences": differences,
1542 }
1544 def find_minimal_failing_sequence(
1545 self, rule: Union[FormalRule, DSLRule], sequence: Sequence
1546 ) -> Optional[Sequence]:
1547 """Find a minimal subsequence that causes the rule to fail."""
1548 if not sequence:
1549 return None
1551 # Check if the full sequence passes the rule
1552 try:
1553 if rule(sequence):
1554 return None # Rule passes, no failing sequence
1555 except Exception:
1556 return None # Error in rule evaluation, can't find failing sequence
1558 # Binary search approach to find minimal failing subsequence
1559 def find_minimal(start: int, end: int) -> Optional[Sequence]:
1560 if start > end:
1561 return None
1563 # Check single element
1564 if start == end:
1565 subseq = [sequence[start]]
1566 try:
1567 if not rule(subseq):
1568 return subseq
1569 except Exception:
1570 pass
1571 return None
1573 # Check first half
1574 mid = (start + end) // 2
1575 first_half = sequence[start : mid + 1]
1576 try:
1577 if not rule(first_half):
1578 return find_minimal(start, mid)
1579 except Exception:
1580 pass
1582 # Check second half
1583 second_half = sequence[mid + 1 : end + 1]
1584 try:
1585 if not rule(second_half):
1586 return find_minimal(mid + 1, end)
1587 except Exception:
1588 pass
1590 # Check if we need both parts
1591 for i in range(start, mid + 1):
1592 for j in range(mid + 1, end + 1):
1593 subseq = [sequence[i], sequence[j]]
1594 try:
1595 if not rule(subseq):
1596 return subseq
1597 except Exception:
1598 pass
1600 return sequence[start : end + 1] # Entire section needed
1602 return find_minimal(0, len(sequence) - 1)