Coverage for src/seqrule/analysis/performance.py: 24%

98 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-27 10:56 -0600

1""" 

2Performance profiling module. 

3 

4This module provides functionality for profiling the performance characteristics 

5of sequence rules, including execution time, memory usage, and scaling behavior. 

6""" 

7 

8import time 

9from dataclasses import dataclass, field 

10from typing import Any, Dict, List, Optional 

11 

12try: 

13 import memory_profiler 

14 

15 HAS_MEMORY_PROFILER = True 

16except ImportError: 

17 HAS_MEMORY_PROFILER = False 

18 

19try: 

20 import scipy.stats 

21 

22 HAS_SCIPY = True 

23except ImportError: 

24 HAS_SCIPY = False 

25 

26 

27@dataclass 

28class PerformanceProfile: 

29 """Performance profiling results for a rule.""" 

30 

31 avg_evaluation_time: float = 0.0 

32 peak_memory_usage: float = 0.0 

33 call_count: int = 0 

34 sequence_sizes: List[int] = field(default_factory=list) 

35 timing_distribution: Dict[Any, float] = field(default_factory=dict) 

36 size_time_correlation: Optional[float] = None 

37 

38 def __post_init__(self): 

39 """Calculate correlation after initialization.""" 

40 if not self.size_time_correlation: 

41 self.size_time_correlation = self._calculate_correlation() 

42 

43 def _calculate_correlation(self) -> Optional[float]: 

44 """Calculate correlation between sequence sizes and execution times.""" 

45 if len(self.sequence_sizes) < 2: 

46 return None 

47 

48 try: 

49 if HAS_SCIPY: 

50 try: 

51 sizes = list(self.sequence_sizes) 

52 times = [self.timing_distribution[size] for size in sizes] 

53 

54 # Check if we have valid data for correlation 

55 if ( 

56 not sizes 

57 or not times 

58 or len(sizes) != len(times) 

59 or all(t == 0 for t in times) 

60 ): 

61 return None 

62 

63 correlation, _ = scipy.stats.pearsonr(sizes, times) 

64 return float(correlation) # Ensure we return a float 

65 except (AttributeError, ModuleNotFoundError, Exception): 

66 # Fall back to manual calculation if scipy fails 

67 pass 

68 

69 # Manual correlation calculation if scipy is not available or failed 

70 sizes = list(self.sequence_sizes) 

71 times = [self.timing_distribution[size] for size in sizes] 

72 

73 # Check if we have valid data for correlation 

74 if not sizes or not times or len(sizes) != len(times): 

75 return None 

76 

77 # If all times are the same, correlation is 0 (no relationship) 

78 if all(t == times[0] for t in times): 

79 return None # Return None for zero variance 

80 

81 # Calculate mean and standard deviation 

82 size_mean = sum(sizes) / len(sizes) 

83 time_mean = sum(times) / len(times) 

84 

85 # Calculate covariance and variances 

86 covariance = sum( 

87 (s - size_mean) * (t - time_mean) for s, t in zip(sizes, times) 

88 ) 

89 size_var = sum((s - size_mean) ** 2 for s in sizes) 

90 time_var = sum((t - time_mean) ** 2 for t in times) 

91 

92 # Calculate correlation coefficient 

93 if size_var == 0 or time_var == 0: 

94 return None # Return None for zero variance 

95 correlation = covariance / (size_var**0.5 * time_var**0.5) 

96 

97 return float(correlation) # Ensure we return a float 

98 except Exception: 

99 # Catch any other exceptions and return None 

100 return None 

101 

102 def __str__(self) -> str: 

103 """Return a human-readable performance summary.""" 

104 # Use 3 decimal places for small values, 2 for larger values 

105 # Special case for zero to match test expectations 

106 if self.avg_evaluation_time == 0: 

107 time_str = "0.00s" 

108 else: 

109 time_format = ".3f" if self.avg_evaluation_time < 0.01 else ".2f" 

110 time_str = f"{self.avg_evaluation_time:{time_format}}s" 

111 return ( 

112 f"Average time: {time_str}\n" 

113 f"Peak memory: {self.peak_memory_usage:.2f}MB\n" 

114 f"Calls: {self.call_count}\n" 

115 f"Size-Time correlation: {self.size_time_correlation or 'N/A'}" 

116 ) 

117 

118 

119class PerformanceProfiler: 

120 """Profiles the performance characteristics of sequence rules.""" 

121 

122 def __init__(self, memory_profiling: bool = False, samples: int = 1): 

123 """Initialize the profiler. 

124 

125 Args: 

126 memory_profiling: Whether to enable memory profiling 

127 samples: Number of samples to collect for each sequence 

128 """ 

129 self.memory_profiling = memory_profiling and HAS_MEMORY_PROFILER 

130 self.samples = samples 

131 

132 def profile_rule( 

133 self, rule_func: callable, sequences: List[List[Any]] 

134 ) -> PerformanceProfile: 

135 """Profile a rule's performance characteristics.""" 

136 if not sequences: 

137 return PerformanceProfile() 

138 

139 # Initialize profiling data 

140 total_time = 0.0 

141 peak_memory = 0.0 

142 timing_distribution = {} 

143 sequence_sizes = [] 

144 call_count = 0 

145 

146 # Check if rule_func is callable 

147 if not callable(rule_func): 

148 print(f"Error profiling sequence: '{rule_func}' object is not callable") 

149 return PerformanceProfile() 

150 

151 for sequence in sequences: 

152 try: 

153 sequence_size = len(sequence) 

154 

155 # Time the rule evaluation 

156 start_time = time.perf_counter() 

157 rule_func(sequence) 

158 end_time = time.perf_counter() 

159 elapsed = end_time - start_time 

160 

161 # Update timing data 

162 total_time += elapsed 

163 timing_distribution[sequence_size] = elapsed 

164 sequence_sizes.append(sequence_size) 

165 call_count += 1 

166 

167 # Profile memory if enabled 

168 if self.memory_profiling: 

169 # Capture the sequence variable in a default argument to avoid loop variable issues 

170 def wrapped_rule(seq=sequence): 

171 rule_func(seq) 

172 

173 mem_usage = memory_profiler.memory_usage( 

174 (wrapped_rule, (), {}), interval=0.1 

175 ) 

176 if mem_usage: 

177 peak_memory = max(peak_memory, max(mem_usage)) 

178 except Exception as e: 

179 # Log the error but continue profiling 

180 print(f"Error profiling sequence: {e}") 

181 continue 

182 

183 # Calculate average time 

184 avg_time = total_time / call_count if call_count else 0.0 

185 

186 return PerformanceProfile( 

187 avg_evaluation_time=avg_time, 

188 peak_memory_usage=peak_memory, 

189 call_count=call_count, 

190 sequence_sizes=sequence_sizes, 

191 timing_distribution=timing_distribution, 

192 )