Coverage for src/seqrule/analysis/complexity.py: 11%

223 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-27 10:56 -0600

1""" 

2Complexity analysis module. 

3 

4This module provides functionality for analyzing the time and space complexity 

5of sequence rules by examining their AST patterns. 

6""" 

7 

8import ast 

9from dataclasses import dataclass, field 

10from typing import Any, Dict, List 

11 

12from .base import ComplexityClass 

13 

14 

15@dataclass 

16class RuleComplexity: 

17 """Complexity analysis results for a rule.""" 

18 

19 time_complexity: ComplexityClass 

20 space_complexity: ComplexityClass 

21 description: str = "" 

22 bottlenecks: List[str] = field(default_factory=list) 

23 ast_features: Dict[str, Any] = field(default_factory=dict) 

24 

25 def __str__(self) -> str: 

26 """Return a human-readable description of the complexity.""" 

27 return ( 

28 f"Time: {self.time_complexity}, Space: {self.space_complexity}\n" 

29 f"Description: {self.description}\n" 

30 f"Bottlenecks: {', '.join(self.bottlenecks)}" 

31 ) 

32 

33 def __post_init__(self): 

34 """Generate description after initialization if not provided.""" 

35 if not self.description: 

36 self.description = self._generate_description() 

37 # Don't modify the case of user-provided descriptions 

38 if self.description == self._generate_description(): 

39 self.description = self.description.lower() 

40 

41 def _generate_description(self) -> str: 

42 """Generate a description based on AST features.""" 

43 parts = [] 

44 if self.ast_features.get("total_loops", 0) > 0: 

45 parts.append(f"contains {self.ast_features['total_loops']} loops") 

46 if self.ast_features.get("comprehensions", 0) > 0: 

47 parts.append(f"uses {self.ast_features['comprehensions']} comprehensions") 

48 if self.ast_features.get("builds_result_list", False): 

49 parts.append("creates temporary collections") 

50 if self.ast_features.get("binary_search", False): 

51 parts.append("uses binary search") 

52 if self.ast_features.get("has_factorial", False): 

53 parts.append("uses factorial recursion") 

54 if self.ast_features.get("has_exponential", False): 

55 parts.append("uses exponential recursion") 

56 return ". ".join(parts) + "." 

57 

58 

59class ComplexityAnalyzer: 

60 """Analyzes AST patterns to determine complexity.""" 

61 

62 def __init__(self, max_calculations=1000, max_recursions=100): 

63 """Initialize the ComplexityAnalyzer with limits. 

64 

65 Args: 

66 max_calculations: Maximum number of calculation operations to perform during analysis 

67 max_recursions: Maximum recursion depth to consider during analysis 

68 """ 

69 self.max_calculations = max_calculations 

70 self.max_recursions = max_recursions 

71 self.operation_count = 0 

72 self.recursion_depth = 0 

73 

74 def analyze(self, sequence): 

75 """Analyze a sequence to determine its complexity. 

76 

77 Args: 

78 sequence: A list of AbstractObject instances to analyze 

79 

80 Returns: 

81 RuleComplexity: The complexity analysis results 

82 """ 

83 # Reset counters 

84 self.operation_count = 0 

85 self.recursion_depth = 0 

86 

87 # Extract values for pattern detection 

88 values = [obj.properties.get("value", 0) for obj in sequence] 

89 

90 # Detect patterns in the sequence 

91 features = self._detect_sequence_patterns(values) 

92 

93 # Determine complexity based on detected patterns 

94 time_complexity = self._determine_time_complexity(features) 

95 space_complexity = self._determine_space_complexity(features) 

96 

97 # Generate description 

98 description = self._generate_complexity_description(features) 

99 

100 # Identify bottlenecks 

101 bottlenecks = [] 

102 if features.get("builds_result_list", False): 

103 bottlenecks.append("Memory usage from temporary collections") 

104 if features.get("has_exponential", False) or features.get( 

105 "has_factorial", False 

106 ): 

107 bottlenecks.append("Exponential growth in computation time") 

108 if features.get("fibonacci_sequence", False): 

109 bottlenecks.append( 

110 "Exponential growth in computation time for Fibonacci sequence" 

111 ) 

112 

113 return RuleComplexity( 

114 time_complexity=time_complexity, 

115 space_complexity=space_complexity, 

116 description=description, 

117 bottlenecks=bottlenecks, 

118 ast_features=features, 

119 ) 

120 

121 def get_complexity_score(self, sequence): 

122 """Get a normalized complexity score for a sequence. 

123 

124 Args: 

125 sequence: A list of AbstractObject instances to analyze 

126 

127 Returns: 

128 float: A normalized complexity score between 0 and 100 

129 """ 

130 complexity = self.analyze(sequence) 

131 

132 # Base score on time complexity class 

133 complexity_weights = { 

134 ComplexityClass.CONSTANT: 10, 

135 ComplexityClass.LINEAR: 30, 

136 ComplexityClass.LINEARITHMIC: 50, 

137 ComplexityClass.QUADRATIC: 70, 

138 ComplexityClass.CUBIC: 85, 

139 ComplexityClass.EXPONENTIAL: 95, 

140 ComplexityClass.FACTORIAL: 100, 

141 } 

142 

143 base_score = complexity_weights.get(complexity.time_complexity, 50) 

144 

145 # Adjust score based on operation count 

146 operation_factor = min(self.operation_count / self.max_calculations, 1.0) 

147 

148 # Final score is weighted combination 

149 score = base_score * 0.7 + (operation_factor * 100) * 0.3 

150 

151 return min(score, 100) # Cap at 100 

152 

153 def _detect_sequence_patterns(self, values): 

154 """Detect patterns in a sequence of values. 

155 

156 Args: 

157 values: A list of values to analyze 

158 

159 Returns: 

160 dict: Features detected in the sequence 

161 """ 

162 features = { 

163 "total_loops": 0, 

164 "nested_loops": 0, 

165 "max_loop_depth": 0, 

166 "comprehensions": 0, 

167 "generator_expressions": 0, 

168 "sorting_operation": False, 

169 "binary_search": False, 

170 "builds_result_list": False, 

171 "has_exponential": False, 

172 "has_factorial": False, 

173 "arithmetic_progression": False, 

174 "geometric_progression": False, 

175 "fibonacci_sequence": False, 

176 } 

177 

178 # Need at least 3 elements to detect patterns 

179 if len(values) < 3: 

180 return features 

181 

182 # Check if all values are numeric (int, float) 

183 all_numeric = all(isinstance(v, (int, float)) for v in values) 

184 if not all_numeric: 

185 # Skip pattern detection for non-numeric values 

186 return features 

187 

188 # Check for arithmetic progression (constant difference) 

189 try: 

190 diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)] 

191 self.operation_count += len(diffs) * 2 # Count subtractions and comparisons 

192 

193 if all(abs(d - diffs[0]) < 0.0001 for d in diffs): 

194 features["arithmetic_progression"] = True 

195 features["total_loops"] = 1 # Simulating a single loop 

196 except (TypeError, ValueError): 

197 # Handle case where subtraction is not supported 

198 pass 

199 

200 # Check for geometric progression (constant ratio) 

201 try: 

202 if all(v != 0 for v in values[:-1]): # Avoid division by zero 

203 ratios = [values[i + 1] / values[i] for i in range(len(values) - 1)] 

204 self.operation_count += ( 

205 len(ratios) * 2 

206 ) # Count divisions and comparisons 

207 

208 if all(abs(r - ratios[0]) < 0.0001 for r in ratios): 

209 features["geometric_progression"] = True 

210 features["has_exponential"] = True 

211 except (TypeError, ValueError, ZeroDivisionError): 

212 # Handle case where division is not supported 

213 pass 

214 

215 # Check for Fibonacci sequence 

216 try: 

217 is_fibonacci = True 

218 for i in range(2, len(values)): 

219 self.operation_count += 3 # Addition and two comparisons 

220 if abs(values[i] - (values[i - 1] + values[i - 2])) > 0.0001: 

221 is_fibonacci = False 

222 break 

223 

224 if is_fibonacci: 

225 features["fibonacci_sequence"] = True 

226 features["has_exponential"] = ( 

227 True # Fibonacci has exponential complexity 

228 ) 

229 except (TypeError, ValueError): 

230 # Handle case where addition/subtraction is not supported 

231 pass 

232 

233 # Simulate building result list for analysis 

234 features["builds_result_list"] = True 

235 

236 return features 

237 

238 def analyze_ast(self, tree: ast.AST) -> RuleComplexity: 

239 """Analyze an AST to determine its complexity.""" 

240 # Reset counters 

241 self.operation_count = 0 

242 

243 features = self._collect_ast_features(tree) 

244 description = self._generate_complexity_description(features) 

245 bottlenecks = [] 

246 

247 if features.get("builds_result_list", False): 

248 bottlenecks.append("Memory usage from temporary collections") 

249 

250 # Determine complexity class 

251 time_complexity = self._determine_time_complexity(features) 

252 space_complexity = self._determine_space_complexity(features) 

253 

254 return RuleComplexity( 

255 time_complexity=time_complexity, 

256 space_complexity=space_complexity, 

257 description=description, 

258 bottlenecks=bottlenecks, 

259 ast_features=features, 

260 ) 

261 

262 def _collect_ast_features(self, tree: ast.AST) -> Dict[str, Any]: 

263 """Collect features from the AST.""" 

264 features = { 

265 "total_loops": 0, 

266 "nested_loops": 0, 

267 "max_loop_depth": 0, 

268 "comprehensions": 0, 

269 "generator_expressions": 0, 

270 "sorting_operation": False, 

271 "binary_search": False, 

272 "builds_result_list": False, 

273 "has_exponential": False, 

274 "has_factorial": False, 

275 "loop_depths": set(), # Track loop depths for better nesting detection 

276 "loop_ranges": [], # Track loop ranges for dependency analysis 

277 "result_lists": [], # Track result list assignments 

278 } 

279 

280 def visit(node: ast.AST, loop_depth: int = 0) -> None: 

281 if isinstance(node, (ast.For, ast.While)): 

282 features["total_loops"] += 1 

283 features["loop_depths"].add(loop_depth) 

284 

285 # Track loop ranges for dependency analysis 

286 if isinstance(node, ast.For) and isinstance(node.iter, ast.Call): 

287 if ( 

288 isinstance(node.iter.func, ast.Name) 

289 and node.iter.func.id == "range" 

290 ): 

291 features["loop_ranges"].append(node.iter.args) 

292 

293 if loop_depth > 0: 

294 features["nested_loops"] += 1 

295 features["max_loop_depth"] = max( 

296 features["max_loop_depth"], loop_depth + 1 

297 ) 

298 

299 # Check for binary search pattern 

300 if isinstance(node, ast.While): 

301 # Look for binary search variables 

302 binary_search_vars = { 

303 "left", 

304 "right", 

305 "l", 

306 "r", 

307 "start", 

308 "end", 

309 "mid", 

310 "middle", 

311 } 

312 assigns = [n for n in ast.walk(node) if isinstance(n, ast.Assign)] 

313 names = { 

314 t.id 

315 for a in assigns 

316 for t in ast.walk(a) 

317 if isinstance(t, ast.Name) 

318 } 

319 if any(v in binary_search_vars for v in names): 

320 # Look for mid calculation 

321 for assign in assigns: 

322 if isinstance(assign.value, ast.BinOp): 

323 if isinstance( 

324 assign.value.op, (ast.Add, ast.Sub, ast.FloorDiv) 

325 ): 

326 features["binary_search"] = True 

327 break 

328 

329 elif isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp)): 

330 features["comprehensions"] += 1 

331 features["builds_result_list"] = True 

332 # Count nested loops in comprehensions 

333 loop_count = len(getattr(node, "generators", [])) 

334 features["total_loops"] += loop_count 

335 if loop_count > 1: 

336 features["nested_loops"] += loop_count - 1 

337 

338 elif isinstance(node, ast.GeneratorExp): 

339 features["generator_expressions"] += 1 

340 # Count nested loops in generator expressions 

341 loop_count = len(getattr(node, "generators", [])) 

342 features["total_loops"] += loop_count 

343 if loop_count > 1: 

344 features["nested_loops"] += loop_count - 1 

345 

346 elif isinstance(node, ast.Call): 

347 if isinstance(node.func, ast.Name): 

348 if node.func.id in {"sorted", "sort"}: 

349 features["sorting_operation"] = True 

350 elif node.func.id in {"set", "list", "dict", "tuple"}: 

351 features["builds_result_list"] = True 

352 elif node.func.id == "factorial": 

353 features["has_factorial"] = True 

354 elif node.func.id == "fibonacci": 

355 features["has_exponential"] = True 

356 

357 elif isinstance(node, ast.Assign): 

358 # Track result list assignments 

359 if isinstance(node.value, (ast.List, ast.Set, ast.Dict)): 

360 features["builds_result_list"] = True 

361 features["result_lists"].append(node.targets[0]) 

362 # Track append/extend operations on result lists 

363 elif ( 

364 isinstance(node.value, ast.Call) 

365 and isinstance(node.value.func, ast.Attribute) 

366 and node.value.func.attr in {"append", "extend", "add", "update"} 

367 ): 

368 features["builds_result_list"] = True 

369 

370 for child in ast.iter_child_nodes(node): 

371 visit( 

372 child, 

373 ( 

374 loop_depth + 1 

375 if isinstance(node, (ast.For, ast.While)) 

376 else loop_depth 

377 ), 

378 ) 

379 

380 visit(tree) 

381 

382 # Analyze loop dependencies 

383 if len(features["loop_ranges"]) >= 2: 

384 # Check if inner loop range depends on outer loop variable 

385 for i in range(len(features["loop_ranges"]) - 1): 

386 outer_args = features["loop_ranges"][i] 

387 inner_args = features["loop_ranges"][i + 1] 

388 

389 # Check if inner loop's range uses outer loop's variable 

390 outer_vars = { 

391 n.id for n in ast.walk(outer_args[0]) if isinstance(n, ast.Name) 

392 } 

393 inner_deps = { 

394 n.id 

395 for a in inner_args 

396 for n in ast.walk(a) 

397 if isinstance(n, ast.Name) 

398 } 

399 

400 if outer_vars & inner_deps: 

401 # Inner loop depends on outer loop -> quadratic 

402 features["nested_loops"] = max(features["nested_loops"], 2) 

403 

404 return features 

405 

406 def _generate_complexity_description(self, features: Dict[str, Any]) -> str: 

407 """Generate a human-readable description of the complexity analysis.""" 

408 parts = [] 

409 

410 if features["total_loops"] > 0: 

411 if features["nested_loops"] > 0: 

412 parts.append( 

413 f"contains {features['total_loops']} loops with {features['nested_loops']} nested loops" 

414 ) 

415 else: 

416 parts.append(f"contains {features['total_loops']} loops") 

417 

418 if features["comprehensions"] > 0: 

419 parts.append(f"uses {features['comprehensions']} comprehensions") 

420 

421 if features["builds_result_list"]: 

422 parts.append("creates temporary collections") 

423 

424 if features.get("arithmetic_progression", False): 

425 parts.append("follows arithmetic progression") 

426 

427 if features.get("geometric_progression", False): 

428 parts.append("follows geometric progression") 

429 

430 if features.get("fibonacci_sequence", False): 

431 parts.append("follows fibonacci sequence") 

432 

433 if features["has_factorial"]: 

434 parts.append("uses factorial recursion") 

435 

436 if features["has_exponential"]: 

437 parts.append("uses exponential recursion") 

438 

439 if features["binary_search"]: 

440 parts.append("uses binary search") 

441 

442 if features["sorting_operation"]: 

443 parts.append("performs sorting") 

444 

445 return ". ".join(parts) + "." 

446 

447 def _determine_time_complexity(self, features: Dict[str, Any]) -> ComplexityClass: 

448 """Determine time complexity based on AST features.""" 

449 if features.get("has_factorial", False): 

450 return ComplexityClass.FACTORIAL 

451 elif features.get("has_exponential", False): 

452 return ComplexityClass.EXPONENTIAL 

453 elif features.get("fibonacci_sequence", False): 

454 # Fibonacci sequences have exponential complexity 

455 return ComplexityClass.EXPONENTIAL 

456 elif features.get("nested_loops", 0) > 0: 

457 # Check if we have true nested loops (not just sequential) 

458 if len(features.get("loop_depths", set())) > 1: 

459 return ComplexityClass.QUADRATIC 

460 elif features.get("sorting_operation", False): 

461 # Sorting operations are O(n log n) 

462 return ComplexityClass.LINEARITHMIC 

463 elif features.get("binary_search", False): 

464 return ComplexityClass.LINEARITHMIC 

465 elif features.get("total_loops", 0) > 0: 

466 # Single loops or generator expressions 

467 if features.get("builds_result_list", False): 

468 # If we're building collections in the loop 

469 return ComplexityClass.LINEAR 

470 return ComplexityClass.LINEAR 

471 return ComplexityClass.CONSTANT 

472 

473 def _determine_space_complexity(self, features: Dict[str, Any]) -> ComplexityClass: 

474 """Determine space complexity based on AST features.""" 

475 if features.get("builds_result_list", False): 

476 # If we're building collections, space complexity is at least linear 

477 return ComplexityClass.LINEAR 

478 elif features.get("total_loops", 0) > 0 and any( 

479 features.get(key, 0) > 0 for key in ["comprehensions"] 

480 ): 

481 # Only list/set/dict comprehensions use linear space 

482 # Generator expressions use constant space 

483 return ComplexityClass.LINEAR 

484 return ComplexityClass.CONSTANT