Coverage for src\llm_code_lens\analyzer\base.py: 18%

191 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-05-25 12:07 +0300

1from abc import ABC, abstractmethod 

2from pathlib import Path 

3from typing import Dict, List, Optional 

4from dataclasses import dataclass 

5 

6@dataclass 

7class AnalysisResult: 

8 """Container for analysis results.""" 

9 summary: dict 

10 insights: List[str] 

11 files: Dict[str, dict] 

12 

13 def to_text(self) -> str: 

14 """Convert analysis to LLM-friendly text format.""" 

15 from ..formatters.llm import format_analysis 

16 return format_analysis(self) 

17 

18 def to_json(self) -> str: 

19 """Convert analysis to JSON format.""" 

20 import json 

21 return json.dumps({ 

22 'summary': self.summary, 

23 'insights': self.insights, 

24 'files': self.files 

25 }, indent=2) 

26 

27class BaseAnalyzer(ABC): 

28 """Base class for all code analyzers.""" 

29 

30 @abstractmethod 

31 def analyze_file(self, file_path: Path) -> dict: 

32 """ 

33 Analyze a file and return standardized analysis results. 

34 

35 Args: 

36 file_path: Path to the file to analyze. 

37 

38 Returns: 

39 dict with the following structure: 

40 { 

41 'type': str, # Analyzer type (e.g., 'python', 'sql') 

42 'metrics': { 

43 'loc': int, # Lines of code 

44 'classes': int, # Number of classes 

45 'functions': int, # Number of functions 

46 'imports': int, # Number of imports 

47 'complexity': int # Complexity metric 

48 }, 

49 'imports': List[str], # List of import statements 

50 'functions': List[dict], # List of function details 

51 'classes': List[dict], # List of class details 

52 'comments': List[dict], # List of comments 

53 'todos': List[dict], # List of TODOs 

54 'errors': List[dict], # Optional analysis errors 

55 'full_content': str, # Optional full file content 

56 } 

57 

58 Note: 

59 - All fields are optional except 'type' and 'metrics' 

60 - Language-specific analyzers may add additional fields 

61 """ 

62 pass 

63 

64class ProjectAnalyzer: 

65 """Main project analyzer that coordinates language-specific analyzers.""" 

66 

67 def __init__(self): 

68 self.analyzers = self._initialize_analyzers() 

69 

70 def _initialize_analyzers(self) -> Dict[str, BaseAnalyzer]: 

71 """Initialize language-specific analyzers.""" 

72 from .python import PythonAnalyzer 

73 from .javascript import JavaScriptAnalyzer 

74 from . import SQLServerAnalyzer # Use the proxy instead of direct import 

75 

76 analyzers = { 

77 '.py': PythonAnalyzer(), 

78 '.js': JavaScriptAnalyzer(), 

79 '.jsx': JavaScriptAnalyzer(), 

80 '.ts': JavaScriptAnalyzer(), 

81 '.tsx': JavaScriptAnalyzer(), 

82 } 

83 

84 # Try to add SQL analyzer, but don't crash if it fails 

85 try: 

86 sql_analyzer = SQLServerAnalyzer() 

87 analyzers['.sql'] = sql_analyzer 

88 except Exception as e: 

89 import warnings 

90 warnings.warn(f"SQL Server analyzer could not be initialized: {e}") 

91 

92 return analyzers 

93 

94 def analyze(self, path: Path) -> AnalysisResult: 

95 """Analyze entire project directory with tree structure.""" 

96 # Initialize analysis structure 

97 analysis = { 

98 'summary': { 

99 'project_stats': { 

100 'total_files': 0, 

101 'by_type': {}, 

102 'lines_of_code': 0, 

103 'avg_file_size': 0 

104 }, 

105 'code_metrics': { 

106 'functions': {'count': 0, 'with_docs': 0, 'complex': 0}, 

107 'classes': {'count': 0, 'with_docs': 0}, 

108 'imports': {'count': 0, 'unique': set()} 

109 }, 

110 'maintenance': { 

111 'todos': [], 

112 'comments_ratio': 0, 

113 'doc_coverage': 0 

114 }, 

115 'structure': { 

116 'directories': set(), 

117 'entry_points': [], 

118 'core_files': [] 

119 } 

120 }, 

121 'insights': [], 

122 'files': {} 

123 } 

124 

125 # Add configuration analysis 

126 config_analysis = self._analyze_project_configuration(path) 

127 analysis['configuration'] = config_analysis 

128 

129 # Collect analyzable files 

130 files = self._collect_files(path) 

131 analysis['summary']['project_stats']['total_files'] = len(files) 

132 

133 # Process each file 

134 for file_path in files: 

135 if analyzer := self.analyzers.get(file_path.suffix.lower()): 

136 try: 

137 file_analysis = analyzer.analyze_file(file_path) 

138 str_path = str(file_path) 

139 

140 # Ensure file_analysis has required fields 

141 if not isinstance(file_analysis, dict): 

142 print(f"Error analyzing {file_path}: Invalid analysis result") 

143 continue 

144 

145 if 'type' not in file_analysis: 

146 file_analysis['type'] = file_path.suffix.lower().lstrip('.') 

147 

148 # Skip files with errors unless they have partial results 

149 if 'errors' in file_analysis and not file_analysis.get('metrics', {}).get('loc', 0): 

150 print(f"Error analyzing {file_path}: {file_analysis['errors']}") 

151 continue 

152 

153 # Update file types count 

154 ext = file_path.suffix 

155 analysis['summary']['project_stats']['by_type'][ext] = \ 

156 analysis['summary']['project_stats']['by_type'].get(ext, 0) + 1 

157 

158 # Store file analysis 

159 analysis['files'][str_path] = file_analysis 

160 

161 # Update metrics 

162 self._update_metrics(analysis, file_analysis, str_path) 

163 

164 except Exception as e: 

165 print(f"Error analyzing {file_path}: {e}") 

166 continue 

167 

168 # Add tree structure generation 

169 from ..utils.tree import ProjectTree 

170 

171 # Get excluded paths from analysis 

172 excluded_paths = set() 

173 if hasattr(self, '_excluded_paths'): 

174 excluded_paths = self._excluded_paths 

175 

176 # Generate tree structure 

177 tree_generator = ProjectTree(ignore_patterns=[], max_depth=4) 

178 project_tree = tree_generator.generate_tree(path, excluded_paths) 

179 summary_tree = tree_generator.generate_summary_tree(path, excluded_paths) 

180 

181 # Add to analysis structure 

182 analysis['summary']['structure']['project_tree'] = project_tree 

183 analysis['summary']['structure']['tree_summary'] = summary_tree 

184 

185 # Calculate final metrics 

186 self._calculate_final_metrics(analysis) 

187 

188 # Generate insights 

189 if insights_gen := analysis.get('summary', {}).get('insights_generator'): 

190 analysis['insights'] = insights_gen(analysis) 

191 else: 

192 analysis['insights'] = self._generate_default_insights(analysis) 

193 

194 return AnalysisResult(**analysis) 

195 

196 def _analyze_package_json(self, path: Path): 

197 from .config import analyze_package_json 

198 return analyze_package_json(path / 'package.json') 

199 

200 def _analyze_tsconfig(self, path: Path): 

201 from .config import analyze_tsconfig 

202 return analyze_tsconfig(path / 'tsconfig.json') 

203 

204 def _analyze_next_config(self, path: Path): 

205 config_file = path / 'next.config.js' 

206 if config_file.exists(): 

207 return {'exists': True, 'type': 'next.js config'} 

208 return None 

209 

210 def _analyze_tailwind_config(self, path: Path): 

211 config_file = path / 'tailwind.config.js' 

212 if config_file.exists(): 

213 return {'exists': True, 'type': 'tailwind config'} 

214 return None 

215 

216 def _analyze_pyproject_toml(self, path: Path): 

217 config_file = path / 'pyproject.toml' 

218 if config_file.exists(): 

219 try: 

220 import tomli 

221 with open(config_file, 'rb') as f: 

222 data = tomli.load(f) 

223 return {'name': data.get('project', {}).get('name'), 'type': 'python project'} 

224 except: 

225 return {'error': 'Failed to parse pyproject.toml'} 

226 return None 

227 

228 def _analyze_requirements(self, path: Path): 

229 req_file = path / 'requirements.txt' 

230 if req_file.exists(): 

231 try: 

232 with open(req_file, 'r') as f: 

233 lines = [line.strip() for line in f if line.strip() and not line.startswith('#')] 

234 return {'dependencies': len(lines), 'type': 'python requirements'} 

235 except: 

236 return {'error': 'Failed to parse requirements.txt'} 

237 return None 

238 

239 def _analyze_env_example(self, path: Path): 

240 env_file = path / '.env.example' 

241 if env_file.exists(): 

242 try: 

243 with open(env_file, 'r') as f: 

244 lines = [line for line in f if '=' in line and not line.startswith('#')] 

245 return {'env_vars': len(lines), 'type': 'environment template'} 

246 except: 

247 return {'error': 'Failed to parse .env.example'} 

248 return None 

249 

250 def _extract_readme_summary(self, path: Path): 

251 from .config import extract_readme_summary 

252 return extract_readme_summary(path) 

253 

254 def _analyze_project_configuration(self, path: Path) -> dict: 

255 """Analyze project configuration files for additional context.""" 

256 config_files = { 

257 'package.json': self._analyze_package_json(path), 

258 'tsconfig.json': self._analyze_tsconfig(path), 

259 'next.config.js': self._analyze_next_config(path), 

260 'tailwind.config.js': self._analyze_tailwind_config(path), 

261 'pyproject.toml': self._analyze_pyproject_toml(path), 

262 'requirements.txt': self._analyze_requirements(path), 

263 '.env.example': self._analyze_env_example(path), 

264 'README.md': self._extract_readme_summary(path) 

265 } 

266 

267 # Filter out None values (files that don't exist) 

268 return {k: v for k, v in config_files.items() if v is not None} 

269 

270 def _collect_files(self, path: Path) -> List[Path]: 

271 """Collect all analyzable files from directory.""" 

272 files = [] 

273 

274 for file_path in path.rglob('*'): 

275 if (file_path.is_file() and 

276 file_path.suffix.lower() in self.analyzers): 

277 files.append(file_path) 

278 

279 return files 

280 

281 def _update_metrics(self, analysis: dict, file_analysis: dict, file_path: str) -> None: 

282 """Update project metrics with file analysis results.""" 

283 metrics = file_analysis.get('metrics', {}) 

284 

285 # Update basic metrics 

286 analysis['summary']['project_stats']['lines_of_code'] += metrics.get('loc', 0) 

287 

288 # Update function metrics 

289 functions = file_analysis.get('functions', []) 

290 analysis['summary']['code_metrics']['functions']['count'] += len(functions) 

291 analysis['summary']['code_metrics']['functions']['with_docs'] += \ 

292 sum(1 for f in functions if f.get('docstring')) 

293 analysis['summary']['code_metrics']['functions']['complex'] += \ 

294 sum(1 for f in functions if f.get('complexity', 0) > 5) 

295 

296 # Update class metrics 

297 classes = file_analysis.get('classes', []) 

298 analysis['summary']['code_metrics']['classes']['count'] += len(classes) 

299 analysis['summary']['code_metrics']['classes']['with_docs'] += \ 

300 sum(1 for c in classes if c.get('docstring')) 

301 

302 # Update imports 

303 imports = file_analysis.get('imports', []) 

304 analysis['summary']['code_metrics']['imports']['count'] += len(imports) 

305 analysis['summary']['code_metrics']['imports']['unique'].update(imports) 

306 

307 # Update structure info 

308 dir_path = str(Path(file_path).parent) 

309 analysis['summary']['structure']['directories'].add(dir_path) 

310 

311 # Update entry points 

312 if self._is_entry_point(file_path, file_analysis): 

313 analysis['summary']['structure']['entry_points'].append(file_path) 

314 

315 # Update core files 

316 if self._is_core_file(file_analysis): 

317 analysis['summary']['structure']['core_files'].append(file_path) 

318 

319 # Update maintenance info 

320 for todo in file_analysis.get('todos', []): 

321 analysis['summary']['maintenance']['todos'].append({ 

322 'file': file_path, 

323 'line': todo.get('line', 0), 

324 'text': todo.get('text', ''), 

325 'priority': self._estimate_todo_priority(todo.get('text', '')) 

326 }) 

327 

328 def _calculate_final_metrics(self, analysis: dict) -> None: 

329 """Calculate final metrics and handle serialization.""" 

330 total_files = analysis['summary']['project_stats']['total_files'] 

331 if total_files > 0: 

332 # Calculate average file size 

333 analysis['summary']['project_stats']['avg_file_size'] = \ 

334 analysis['summary']['project_stats']['lines_of_code'] / total_files 

335 

336 # Calculate documentation coverage 

337 total_elements = ( 

338 analysis['summary']['code_metrics']['functions']['count'] + 

339 analysis['summary']['code_metrics']['classes']['count'] 

340 ) 

341 if total_elements > 0: 

342 documented = ( 

343 analysis['summary']['code_metrics']['functions']['with_docs'] + 

344 analysis['summary']['code_metrics']['classes']['with_docs'] 

345 ) 

346 analysis['summary']['maintenance']['doc_coverage'] = \ 

347 (documented / total_elements) * 100 

348 

349 # Convert sets to lists for serialization 

350 analysis['summary']['code_metrics']['imports']['unique'] = \ 

351 list(analysis['summary']['code_metrics']['imports']['unique']) 

352 analysis['summary']['structure']['directories'] = \ 

353 list(analysis['summary']['structure']['directories']) 

354 

355 def _is_entry_point(self, file_path: str, analysis: dict) -> bool: 

356 """Identify if a file is a potential entry point.""" 

357 from ..utils import is_potential_entry_point 

358 return is_potential_entry_point(file_path, analysis) 

359 

360 def _is_core_file(self, analysis: dict) -> bool: 

361 """Identify if a file is likely a core component.""" 

362 from ..utils import is_core_file 

363 return is_core_file(analysis) 

364 

365 def _estimate_todo_priority(self, text: str) -> str: 

366 """Estimate TODO priority based on content.""" 

367 from ..utils import estimate_todo_priority 

368 return estimate_todo_priority(text) 

369 

370 def _generate_default_insights(self, analysis: dict) -> List[str]: 

371 """Generate default insights from analysis results.""" 

372 insights = [] 

373 

374 # Basic project stats 

375 total_files = analysis['summary']['project_stats']['total_files'] 

376 insights.append(f"Project contains {total_files} analyzable files") 

377 

378 # Documentation insights 

379 doc_coverage = analysis['summary']['maintenance']['doc_coverage'] 

380 if doc_coverage < 50: 

381 insights.append(f"Low documentation coverage ({doc_coverage:.1f}%)") 

382 elif doc_coverage > 80: 

383 insights.append(f"Good documentation coverage ({doc_coverage:.1f}%)") 

384 

385 # Complexity insights 

386 complex_funcs = analysis['summary']['code_metrics']['functions']['complex'] 

387 if complex_funcs > 0: 

388 insights.append(f"Found {complex_funcs} complex functions that might need attention") 

389 

390 # TODO insights 

391 todos = analysis['summary']['maintenance']['todos'] 

392 if todos: 

393 high_priority = sum(1 for todo in todos if todo['priority'] == 'high') 

394 if high_priority > 0: 

395 insights.append(f"Found {high_priority} high-priority TODOs") 

396 

397 return insights