Coverage for src/seqrule/analysis/performance.py: 24%
98 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-27 10:56 -0600
1"""
2Performance profiling module.
4This module provides functionality for profiling the performance characteristics
5of sequence rules, including execution time, memory usage, and scaling behavior.
6"""
8import time
9from dataclasses import dataclass, field
10from typing import Any, Dict, List, Optional
12try:
13 import memory_profiler
15 HAS_MEMORY_PROFILER = True
16except ImportError:
17 HAS_MEMORY_PROFILER = False
19try:
20 import scipy.stats
22 HAS_SCIPY = True
23except ImportError:
24 HAS_SCIPY = False
27@dataclass
28class PerformanceProfile:
29 """Performance profiling results for a rule."""
31 avg_evaluation_time: float = 0.0
32 peak_memory_usage: float = 0.0
33 call_count: int = 0
34 sequence_sizes: List[int] = field(default_factory=list)
35 timing_distribution: Dict[Any, float] = field(default_factory=dict)
36 size_time_correlation: Optional[float] = None
38 def __post_init__(self):
39 """Calculate correlation after initialization."""
40 if not self.size_time_correlation:
41 self.size_time_correlation = self._calculate_correlation()
43 def _calculate_correlation(self) -> Optional[float]:
44 """Calculate correlation between sequence sizes and execution times."""
45 if len(self.sequence_sizes) < 2:
46 return None
48 try:
49 if HAS_SCIPY:
50 try:
51 sizes = list(self.sequence_sizes)
52 times = [self.timing_distribution[size] for size in sizes]
54 # Check if we have valid data for correlation
55 if (
56 not sizes
57 or not times
58 or len(sizes) != len(times)
59 or all(t == 0 for t in times)
60 ):
61 return None
63 correlation, _ = scipy.stats.pearsonr(sizes, times)
64 return float(correlation) # Ensure we return a float
65 except (AttributeError, ModuleNotFoundError, Exception):
66 # Fall back to manual calculation if scipy fails
67 pass
69 # Manual correlation calculation if scipy is not available or failed
70 sizes = list(self.sequence_sizes)
71 times = [self.timing_distribution[size] for size in sizes]
73 # Check if we have valid data for correlation
74 if not sizes or not times or len(sizes) != len(times):
75 return None
77 # If all times are the same, correlation is 0 (no relationship)
78 if all(t == times[0] for t in times):
79 return None # Return None for zero variance
81 # Calculate mean and standard deviation
82 size_mean = sum(sizes) / len(sizes)
83 time_mean = sum(times) / len(times)
85 # Calculate covariance and variances
86 covariance = sum(
87 (s - size_mean) * (t - time_mean) for s, t in zip(sizes, times)
88 )
89 size_var = sum((s - size_mean) ** 2 for s in sizes)
90 time_var = sum((t - time_mean) ** 2 for t in times)
92 # Calculate correlation coefficient
93 if size_var == 0 or time_var == 0:
94 return None # Return None for zero variance
95 correlation = covariance / (size_var**0.5 * time_var**0.5)
97 return float(correlation) # Ensure we return a float
98 except Exception:
99 # Catch any other exceptions and return None
100 return None
102 def __str__(self) -> str:
103 """Return a human-readable performance summary."""
104 # Use 3 decimal places for small values, 2 for larger values
105 # Special case for zero to match test expectations
106 if self.avg_evaluation_time == 0:
107 time_str = "0.00s"
108 else:
109 time_format = ".3f" if self.avg_evaluation_time < 0.01 else ".2f"
110 time_str = f"{self.avg_evaluation_time:{time_format}}s"
111 return (
112 f"Average time: {time_str}\n"
113 f"Peak memory: {self.peak_memory_usage:.2f}MB\n"
114 f"Calls: {self.call_count}\n"
115 f"Size-Time correlation: {self.size_time_correlation or 'N/A'}"
116 )
119class PerformanceProfiler:
120 """Profiles the performance characteristics of sequence rules."""
122 def __init__(self, memory_profiling: bool = False, samples: int = 1):
123 """Initialize the profiler.
125 Args:
126 memory_profiling: Whether to enable memory profiling
127 samples: Number of samples to collect for each sequence
128 """
129 self.memory_profiling = memory_profiling and HAS_MEMORY_PROFILER
130 self.samples = samples
132 def profile_rule(
133 self, rule_func: callable, sequences: List[List[Any]]
134 ) -> PerformanceProfile:
135 """Profile a rule's performance characteristics."""
136 if not sequences:
137 return PerformanceProfile()
139 # Initialize profiling data
140 total_time = 0.0
141 peak_memory = 0.0
142 timing_distribution = {}
143 sequence_sizes = []
144 call_count = 0
146 # Check if rule_func is callable
147 if not callable(rule_func):
148 print(f"Error profiling sequence: '{rule_func}' object is not callable")
149 return PerformanceProfile()
151 for sequence in sequences:
152 try:
153 sequence_size = len(sequence)
155 # Time the rule evaluation
156 start_time = time.perf_counter()
157 rule_func(sequence)
158 end_time = time.perf_counter()
159 elapsed = end_time - start_time
161 # Update timing data
162 total_time += elapsed
163 timing_distribution[sequence_size] = elapsed
164 sequence_sizes.append(sequence_size)
165 call_count += 1
167 # Profile memory if enabled
168 if self.memory_profiling:
169 # Capture the sequence variable in a default argument to avoid loop variable issues
170 def wrapped_rule(seq=sequence):
171 rule_func(seq)
173 mem_usage = memory_profiler.memory_usage(
174 (wrapped_rule, (), {}), interval=0.1
175 )
176 if mem_usage:
177 peak_memory = max(peak_memory, max(mem_usage))
178 except Exception as e:
179 # Log the error but continue profiling
180 print(f"Error profiling sequence: {e}")
181 continue
183 # Calculate average time
184 avg_time = total_time / call_count if call_count else 0.0
186 return PerformanceProfile(
187 avg_evaluation_time=avg_time,
188 peak_memory_usage=peak_memory,
189 call_count=call_count,
190 sequence_sizes=sequence_sizes,
191 timing_distribution=timing_distribution,
192 )