Coverage for src\llm_code_lens\analyzer\python.py: 24%

288 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-05-25 12:07 +0300

1import ast 

2from pathlib import Path 

3from typing import Dict, List, Optional, Set, Tuple, Union 

4from dataclasses import dataclass 

5from .base import BaseAnalyzer 

6 

7@dataclass 

8class CodeLocation: 

9 """Represents a location in source code.""" 

10 line: int 

11 column: int 

12 end_line: Optional[int] = None 

13 end_column: Optional[int] = None 

14 

15@dataclass 

16class ImportInfo: 

17 """Information about an import statement.""" 

18 name: str 

19 alias: Optional[str] 

20 module: Optional[str] 

21 is_relative: bool 

22 location: CodeLocation 

23 

24@dataclass 

25class FunctionArgument: 

26 """Information about a function argument.""" 

27 name: str 

28 type_annotation: Optional[str] 

29 default_value: Optional[str] 

30 is_kwonly: bool = False 

31 is_vararg: bool = False 

32 is_kwarg: bool = False 

33 

34@dataclass 

35class FunctionInfo: 

36 """Detailed information about a function.""" 

37 name: str 

38 args: List[FunctionArgument] 

39 return_type: Optional[str] 

40 docstring: Optional[str] 

41 decorators: List[str] 

42 is_async: bool 

43 location: CodeLocation 

44 complexity: int 

45 loc: int 

46 

47@dataclass 

48class ClassInfo: 

49 """Detailed information about a class.""" 

50 name: str 

51 bases: List[str] 

52 methods: List[str] 

53 docstring: Optional[str] 

54 decorators: List[str] 

55 location: CodeLocation 

56 complexity: int 

57 

58class PythonAnalyzer(BaseAnalyzer): 

59 """Python-specific code analyzer using AST with enhanced features.""" 

60 

61 def analyze_file(self, file_path: Path) -> dict: 

62 """ 

63 Analyze a Python file and return detailed analysis results. 

64  

65 Args: 

66 file_path: Path to the Python file to analyze. 

67  

68 Returns: 

69 dict: Comprehensive analysis results including: 

70 - Imports 

71 - Functions (with args, types, etc.) 

72 - Classes (with inheritance, methods) 

73 - Docstrings and comments 

74 - Complexity metrics 

75 - TODOs and other markers 

76 """ 

77 try: 

78 with open(file_path, 'r', encoding='utf-8') as f: 

79 content = f.read() 

80 

81 tree = ast.parse(content) 

82 

83 # Initialize analysis dictionary 

84 analysis = { 

85 'type': 'python', 

86 'full_content': content, 

87 'imports': [], 

88 'functions': [], 

89 'classes': [], 

90 'comments': [], 

91 'todos': [], 

92 'metrics': { 

93 'loc': len(content.splitlines()), 

94 'classes': 0, 

95 'functions': 0, 

96 'imports': 0, 

97 'complexity': 0 

98 } 

99 } 

100 

101 # Process each component 

102 self._process_imports(tree, analysis) 

103 self._process_functions(tree, analysis, content) 

104 self._process_classes(tree, analysis, content) 

105 self._process_comments(content, analysis) 

106 

107 # Calculate overall complexity 

108 analysis['metrics']['complexity'] = self._calculate_module_complexity(tree) 

109 

110 return analysis 

111 

112 except SyntaxError as e: 

113 # Return partial analysis for syntax errors 

114 return { 

115 'type': 'python', 

116 'errors': [{ 

117 'type': 'syntax_error', 

118 'line': e.lineno, 

119 'offset': e.offset, 

120 'text': str(e) 

121 }], 

122 'metrics': { 

123 'loc': 0, 

124 'classes': 0, 

125 'functions': 0, 

126 'imports': 0, 

127 'complexity': 0 

128 } 

129 } 

130 except Exception as e: 

131 # Handle other errors gracefully 

132 return { 

133 'type': 'python', 

134 'errors': [{ 

135 'type': 'analysis_error', 

136 'text': str(e) 

137 }], 

138 'metrics': { 

139 'loc': 0, 

140 'classes': 0, 

141 'functions': 0, 

142 'imports': 0, 

143 'complexity': 0 

144 } 

145 } 

146 

147 def _process_imports(self, tree: ast.AST, analysis: dict) -> None: 

148 """Process imports and handle each import statement individually.""" 

149 unique_imports = set() 

150 import_count = 0 

151 

152 for node in ast.walk(tree): 

153 if isinstance(node, ast.Import): 

154 for name in node.names: 

155 import_count += 1 

156 unique_imports.add(f"import {name.name}") 

157 elif isinstance(node, ast.ImportFrom): 

158 module = node.module or '' 

159 level = '.' * node.level 

160 

161 # Group imports from same module together 

162 for name in node.names: 

163 import_count += 1 

164 if name.asname: 

165 unique_imports.add(f"from {level}{module} import {name.name} as {name.asname}") 

166 else: 

167 unique_imports.add(f"from {level}{module} import {name.name}") 

168 

169 analysis['metrics']['imports'] = import_count 

170 analysis['imports'] = sorted(list(unique_imports)) 

171 

172 

173 

174 

175 

176 def _process_functions(self, tree: ast.AST, analysis: dict, content: str) -> None: 

177 """Extract and analyze function definitions.""" 

178 for node in ast.walk(tree): 

179 if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): 

180 analysis['metrics']['functions'] += 1 

181 

182 # Extract function information 

183 func_info = FunctionInfo( 

184 name=node.name, 

185 args=self._extract_function_args(node.args), 

186 return_type=self._format_annotation(node.returns) if node.returns else None, 

187 docstring=ast.get_docstring(node), 

188 decorators=[self._format_decorator(d) for d in node.decorator_list], 

189 is_async=isinstance(node, ast.AsyncFunctionDef), 

190 location=CodeLocation( 

191 line=node.lineno, 

192 column=node.col_offset, 

193 end_line=node.end_lineno, 

194 end_column=node.end_col_offset 

195 ), 

196 complexity=self._calculate_function_complexity(node), 

197 loc=len(node.body) 

198 ) 

199 

200 # Get function content 

201 func_content = self._extract_source(content, func_info.location) 

202 

203 # Add to analysis 

204 analysis['functions'].append({ 

205 'name': func_info.name, 

206 'args': [self._format_argument(arg) for arg in func_info.args], 

207 'return_type': func_info.return_type, 

208 'docstring': func_info.docstring, 

209 'decorators': func_info.decorators, 

210 'is_async': func_info.is_async, 

211 'content': func_content, 

212 'loc': func_info.loc, 

213 'line_number': func_info.location.line, 

214 'complexity': func_info.complexity 

215 }) 

216 

217 def _extract_function_args(self, args: ast.arguments) -> List[FunctionArgument]: 

218 """Extract function arguments with improved handling.""" 

219 arguments = [] 

220 

221 # Handle positional-only arguments (Python 3.8+) 

222 if hasattr(args, 'posonlyargs'): 

223 for arg in args.posonlyargs: 

224 arguments.append(self._create_argument(arg)) 

225 

226 # Handle regular positional arguments 

227 for arg in args.args: 

228 # Skip self/cls for methods 

229 if arg.arg in ('self', 'cls') and len(args.args) > 0: 

230 continue 

231 arguments.append(self._create_argument(arg)) 

232 

233 # Add defaults for positional arguments 

234 defaults_start = len(arguments) - len(args.defaults) 

235 for i, default in enumerate(args.defaults): 

236 if i + defaults_start >= 0: # Ensure valid index 

237 arguments[defaults_start + i].default_value = self._format_annotation(default) 

238 

239 # Handle *args 

240 if args.vararg: 

241 arguments.append(FunctionArgument( 

242 name=f"*{args.vararg.arg}", 

243 type_annotation=self._format_annotation(args.vararg.annotation) if args.vararg.annotation else None, 

244 default_value=None, 

245 is_vararg=True 

246 )) 

247 

248 # Handle keyword-only arguments 

249 for arg in args.kwonlyargs: 

250 arguments.append(self._create_argument(arg, is_kwonly=True)) 

251 

252 # Add defaults for keyword-only arguments 

253 for i, default in enumerate(args.kw_defaults): 

254 if default and i < len(args.kwonlyargs): 

255 arg_idx = len(arguments) - len(args.kw_defaults) + i 

256 if arg_idx >= 0: # Ensure valid index 

257 arguments[arg_idx].default_value = self._format_annotation(default) 

258 

259 # Handle **kwargs 

260 if args.kwarg: 

261 arguments.append(FunctionArgument( 

262 name=f"**{args.kwarg.arg}", 

263 type_annotation=self._format_annotation(args.kwarg.annotation) if args.kwarg.annotation else None, 

264 default_value=None, 

265 is_kwarg=True 

266 )) 

267 

268 return arguments 

269 

270 def _create_argument(self, arg: ast.arg, is_kwonly: bool = False) -> FunctionArgument: 

271 """Helper to create a FunctionArgument instance.""" 

272 return FunctionArgument( 

273 name=arg.arg, 

274 type_annotation=self._format_annotation(arg.annotation) if arg.annotation else None, 

275 default_value=None, 

276 is_kwonly=is_kwonly 

277 ) 

278 

279 

280 def _process_classes(self, tree: ast.AST, analysis: dict, content: str) -> None: 

281 """Extract and analyze class definitions.""" 

282 for node in ast.walk(tree): 

283 if isinstance(node, ast.ClassDef): 

284 analysis['metrics']['classes'] += 1 

285 

286 # Get class information 

287 class_info = ClassInfo( 

288 name=node.name, 

289 bases=self._extract_base_classes(node), 

290 methods=self._extract_class_methods(node), 

291 docstring=ast.get_docstring(node), 

292 decorators=[self._format_decorator(d) for d in node.decorator_list], 

293 location=CodeLocation( 

294 line=node.lineno, 

295 column=node.col_offset, 

296 end_line=node.end_lineno, 

297 end_column=node.end_col_offset 

298 ), 

299 complexity=self._calculate_class_complexity(node) 

300 ) 

301 

302 # Add to analysis 

303 analysis['classes'].append({ 

304 'name': class_info.name, 

305 'bases': class_info.bases, 

306 'methods': class_info.methods, 

307 'docstring': class_info.docstring, 

308 'decorators': class_info.decorators, 

309 'line_number': class_info.location.line, 

310 'complexity': class_info.complexity 

311 }) 

312 

313 def _extract_class_methods(self, node: ast.ClassDef) -> List[Dict]: 

314 """Extract detailed method information from a class.""" 

315 methods = [] 

316 

317 for item in node.body: 

318 if isinstance(item, ast.FunctionDef): 

319 method_info = { 

320 'name': item.name, 

321 'docstring': ast.get_docstring(item), 

322 'decorators': [self._format_decorator(d) for d in item.decorator_list], 

323 'is_property': self._is_property(item), 

324 'is_classmethod': self._is_classmethod(item), 

325 'is_staticmethod': self._is_staticmethod(item), 

326 'line_number': item.lineno 

327 } 

328 methods.append(method_info) 

329 

330 return methods 

331 

332 def _process_comments(self, content: str, analysis: dict) -> None: 

333 """Extract and categorize comments and TODOs.""" 

334 lines = content.split('\n') 

335 

336 # Track multiline strings/comments 

337 in_multiline = False 

338 multiline_content = [] 

339 multiline_start = 0 

340 

341 for i, line in enumerate(lines, 1): 

342 stripped = line.strip() 

343 

344 # Handle multiline strings that might be docstrings 

345 if stripped.startswith('"""') or stripped.startswith("'''"): 

346 if not in_multiline and not (stripped.endswith('"""') or stripped.endswith("'''")): 

347 in_multiline = True 

348 multiline_start = i 

349 multiline_content = [stripped] 

350 continue 

351 elif in_multiline: 

352 in_multiline = False 

353 multiline_content.append(stripped) 

354 # Only process if it's a comment, not a docstring 

355 if not self._is_docstring(content, multiline_start): 

356 comment_text = '\n'.join(multiline_content) 

357 self._add_comment_or_todo(comment_text, multiline_start, analysis) 

358 continue 

359 

360 if in_multiline: 

361 multiline_content.append(stripped) 

362 continue 

363 

364 # Handle single line comments 

365 if stripped.startswith('#'): 

366 comment_text = stripped[1:].strip() 

367 self._add_comment_or_todo(comment_text, i, analysis) 

368 

369 def _calculate_function_complexity(self, node: ast.FunctionDef) -> int: 

370 """Calculate cyclomatic complexity for a function.""" 

371 complexity = 1 # Base complexity 

372 

373 for child in ast.walk(node): 

374 # Control flow increases complexity 

375 if isinstance(child, (ast.If, ast.While, ast.For, ast.AsyncFor)): 

376 complexity += 1 

377 elif isinstance(child, ast.ExceptHandler): 

378 complexity += 1 

379 elif isinstance(child, ast.BoolOp): 

380 if isinstance(child.op, ast.And): 

381 complexity += len(child.values) - 1 

382 elif isinstance(child, ast.Return): 

383 if isinstance(child.value, ast.IfExp): 

384 complexity += 1 

385 

386 return complexity 

387 

388 def _calculate_class_complexity(self, node: ast.ClassDef) -> int: 

389 """Calculate complexity for a class.""" 

390 complexity = len(node.bases) # Inheritance adds complexity 

391 

392 # Add complexity of methods 

393 for child in node.body: 

394 if isinstance(child, ast.FunctionDef): 

395 complexity += self._calculate_function_complexity(child) 

396 

397 return complexity 

398 

399 def _calculate_module_complexity(self, tree: ast.AST) -> int: 

400 """Calculate overall module complexity.""" 

401 complexity = 0 

402 

403 # Add complexity of all functions and classes 

404 for node in ast.walk(tree): 

405 if isinstance(node, ast.FunctionDef): 

406 complexity += self._calculate_function_complexity(node) 

407 elif isinstance(node, ast.ClassDef): 

408 complexity += self._calculate_class_complexity(node) 

409 

410 return complexity 

411 

412 def _extract_base_classes(self, node: ast.ClassDef) -> List[str]: 

413 """Extract and format base class information.""" 

414 bases = [] 

415 for base in node.bases: 

416 if isinstance(base, ast.Name): 

417 bases.append(base.id) 

418 elif isinstance(base, ast.Attribute): 

419 bases.append(f"{self._format_dotted_name(base)}") 

420 elif isinstance(base, ast.Call): 

421 # Handle metaclasses and parameterized bases 

422 if isinstance(base.func, ast.Name): 

423 bases.append(f"{base.func.id}(...)") 

424 elif isinstance(base.func, ast.Attribute): 

425 bases.append(f"{self._format_dotted_name(base.func)}(...)") 

426 return bases 

427 

428 def _format_dotted_name(self, node: ast.Attribute) -> str: 

429 """Format attribute access into dotted name.""" 

430 parts = [] 

431 current = node 

432 while isinstance(current, ast.Attribute): 

433 parts.append(current.attr) 

434 current = current.value 

435 if isinstance(current, ast.Name): 

436 parts.append(current.id) 

437 return '.'.join(reversed(parts)) 

438 

439 def _format_annotation(self, node: Optional[ast.AST]) -> Optional[str]: 

440 """Format type annotations into string representation.""" 

441 if node is None: 

442 return None 

443 

444 if isinstance(node, ast.Name): 

445 return node.id 

446 elif isinstance(node, ast.Attribute): 

447 return self._format_dotted_name(node) 

448 elif isinstance(node, ast.Subscript): 

449 value = self._format_annotation(node.value) 

450 if isinstance(node.slice, ast.Index): 

451 # Handle Python 3.8 style annotations 

452 slice_value = self._format_annotation(node.slice.value) 

453 else: 

454 # Handle Python 3.9+ style annotations 

455 slice_value = self._format_annotation(node.slice) 

456 return f"{value}[{slice_value}]" 

457 elif isinstance(node, ast.Tuple): 

458 elements = [self._format_annotation(elt) for elt in node.elts] 

459 return f"Tuple[{', '.join(elements)}]" 

460 elif isinstance(node, ast.List): 

461 elements = [self._format_annotation(elt) for elt in node.elts] 

462 return f"List[{', '.join(elements)}]" 

463 elif isinstance(node, ast.Constant): 

464 return repr(node.value) 

465 elif isinstance(node, ast.BinOp): 

466 if isinstance(node.op, ast.BitOr): 

467 left = self._format_annotation(node.left) 

468 right = self._format_annotation(node.right) 

469 return f"Union[{left}, {right}]" 

470 elif isinstance(node, ast.Index): 

471 # Handle Python 3.8 style index nodes directly 

472 return self._format_annotation(node.value) 

473 return str(node) 

474 

475 

476 

477 def _format_import(self, import_info: ImportInfo) -> str: 

478 """Format import information into string representation.""" 

479 if import_info.module: 

480 result = f"from {import_info.module} import {import_info.name}" 

481 else: 

482 result = f"import {import_info.name}" 

483 

484 if import_info.alias: 

485 result += f" as {import_info.alias}" 

486 

487 return result 

488 

489 def _format_argument(self, arg: FunctionArgument) -> str: 

490 """Format function argument into string representation.""" 

491 parts = [] 

492 

493 # Handle special argument types 

494 if arg.is_vararg: 

495 parts.append('*' + arg.name) 

496 elif arg.is_kwarg: 

497 parts.append('**' + arg.name) 

498 else: 

499 parts.append(arg.name) 

500 

501 # Add type annotation if present 

502 if arg.type_annotation: 

503 parts[0] += f": {arg.type_annotation}" 

504 

505 # Add default value if present 

506 if arg.default_value: 

507 parts[0] += f" = {arg.default_value}" 

508 

509 return parts[0] 

510 

511 def _format_decorator(self, node: ast.expr) -> str: 

512 """Format decorator into string representation.""" 

513 if isinstance(node, ast.Name): 

514 return node.id 

515 elif isinstance(node, ast.Call): 

516 if isinstance(node.func, ast.Name): 

517 return f"{node.func.id}(...)" 

518 elif isinstance(node.func, ast.Attribute): 

519 return f"{self._format_dotted_name(node.func)}(...)" 

520 elif isinstance(node, ast.Attribute): 

521 return self._format_dotted_name(node) 

522 return "unknown_decorator" 

523 

524 def _extract_source(self, content: str, location: CodeLocation) -> str: 

525 """Extract source code for a node based on its location.""" 

526 lines = content.splitlines() 

527 if location.end_line: 

528 return '\n'.join(lines[location.line-1:location.end_line]) 

529 return lines[location.line-1] 

530 

531 def _is_docstring(self, content: str, line_number: int) -> bool: 

532 """Check if a multiline string is a docstring.""" 

533 lines = content.splitlines() 

534 

535 # Look for the previous non-empty line 

536 current_line = line_number - 2 # -2 because line_number is 1-based 

537 while current_line >= 0 and not lines[current_line].strip(): 

538 current_line -= 1 

539 

540 if current_line < 0: 

541 return True # Module-level docstring 

542 

543 prev_line = lines[current_line].strip() 

544 return prev_line.endswith(':') or prev_line.startswith('@') 

545 

546 def _add_comment_or_todo(self, text: str, line: int, analysis: dict) -> None: 

547 """Add a comment as either a regular comment or TODO based on content.""" 

548 text = text.strip() 

549 if any(marker in text.upper() for marker in ['TODO', 'FIXME', 'XXX']): 

550 analysis['todos'].append({ 

551 'text': text, 

552 'line': line 

553 }) 

554 else: 

555 analysis['comments'].append({ 

556 'text': text, 

557 'line': line 

558 }) 

559 

560 def _is_property(self, node: ast.FunctionDef) -> bool: 

561 """Check if a method is a property.""" 

562 return any( 

563 self._format_decorator(d) in {'property', 'cached_property'} 

564 for d in node.decorator_list 

565 ) 

566 

567 def _is_classmethod(self, node: ast.FunctionDef) -> bool: 

568 """Check if a method is a classmethod.""" 

569 return any( 

570 self._format_decorator(d) == 'classmethod' 

571 for d in node.decorator_list 

572 ) 

573 

574 def _is_staticmethod(self, node: ast.FunctionDef) -> bool: 

575 """Check if a method is a staticmethod.""" 

576 return any( 

577 self._format_decorator(d) == 'staticmethod' 

578 for d in node.decorator_list 

579 )