Coverage for src\llm_code_lens\analyzer\base.py: 18%
191 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-05-25 12:07 +0300
« prev ^ index » next coverage.py v7.7.0, created at 2025-05-25 12:07 +0300
1from abc import ABC, abstractmethod
2from pathlib import Path
3from typing import Dict, List, Optional
4from dataclasses import dataclass
6@dataclass
7class AnalysisResult:
8 """Container for analysis results."""
9 summary: dict
10 insights: List[str]
11 files: Dict[str, dict]
13 def to_text(self) -> str:
14 """Convert analysis to LLM-friendly text format."""
15 from ..formatters.llm import format_analysis
16 return format_analysis(self)
18 def to_json(self) -> str:
19 """Convert analysis to JSON format."""
20 import json
21 return json.dumps({
22 'summary': self.summary,
23 'insights': self.insights,
24 'files': self.files
25 }, indent=2)
27class BaseAnalyzer(ABC):
28 """Base class for all code analyzers."""
30 @abstractmethod
31 def analyze_file(self, file_path: Path) -> dict:
32 """
33 Analyze a file and return standardized analysis results.
35 Args:
36 file_path: Path to the file to analyze.
38 Returns:
39 dict with the following structure:
40 {
41 'type': str, # Analyzer type (e.g., 'python', 'sql')
42 'metrics': {
43 'loc': int, # Lines of code
44 'classes': int, # Number of classes
45 'functions': int, # Number of functions
46 'imports': int, # Number of imports
47 'complexity': int # Complexity metric
48 },
49 'imports': List[str], # List of import statements
50 'functions': List[dict], # List of function details
51 'classes': List[dict], # List of class details
52 'comments': List[dict], # List of comments
53 'todos': List[dict], # List of TODOs
54 'errors': List[dict], # Optional analysis errors
55 'full_content': str, # Optional full file content
56 }
58 Note:
59 - All fields are optional except 'type' and 'metrics'
60 - Language-specific analyzers may add additional fields
61 """
62 pass
64class ProjectAnalyzer:
65 """Main project analyzer that coordinates language-specific analyzers."""
67 def __init__(self):
68 self.analyzers = self._initialize_analyzers()
70 def _initialize_analyzers(self) -> Dict[str, BaseAnalyzer]:
71 """Initialize language-specific analyzers."""
72 from .python import PythonAnalyzer
73 from .javascript import JavaScriptAnalyzer
74 from . import SQLServerAnalyzer # Use the proxy instead of direct import
76 analyzers = {
77 '.py': PythonAnalyzer(),
78 '.js': JavaScriptAnalyzer(),
79 '.jsx': JavaScriptAnalyzer(),
80 '.ts': JavaScriptAnalyzer(),
81 '.tsx': JavaScriptAnalyzer(),
82 }
84 # Try to add SQL analyzer, but don't crash if it fails
85 try:
86 sql_analyzer = SQLServerAnalyzer()
87 analyzers['.sql'] = sql_analyzer
88 except Exception as e:
89 import warnings
90 warnings.warn(f"SQL Server analyzer could not be initialized: {e}")
92 return analyzers
94 def analyze(self, path: Path) -> AnalysisResult:
95 """Analyze entire project directory with tree structure."""
96 # Initialize analysis structure
97 analysis = {
98 'summary': {
99 'project_stats': {
100 'total_files': 0,
101 'by_type': {},
102 'lines_of_code': 0,
103 'avg_file_size': 0
104 },
105 'code_metrics': {
106 'functions': {'count': 0, 'with_docs': 0, 'complex': 0},
107 'classes': {'count': 0, 'with_docs': 0},
108 'imports': {'count': 0, 'unique': set()}
109 },
110 'maintenance': {
111 'todos': [],
112 'comments_ratio': 0,
113 'doc_coverage': 0
114 },
115 'structure': {
116 'directories': set(),
117 'entry_points': [],
118 'core_files': []
119 }
120 },
121 'insights': [],
122 'files': {}
123 }
125 # Add configuration analysis
126 config_analysis = self._analyze_project_configuration(path)
127 analysis['configuration'] = config_analysis
129 # Collect analyzable files
130 files = self._collect_files(path)
131 analysis['summary']['project_stats']['total_files'] = len(files)
133 # Process each file
134 for file_path in files:
135 if analyzer := self.analyzers.get(file_path.suffix.lower()):
136 try:
137 file_analysis = analyzer.analyze_file(file_path)
138 str_path = str(file_path)
140 # Ensure file_analysis has required fields
141 if not isinstance(file_analysis, dict):
142 print(f"Error analyzing {file_path}: Invalid analysis result")
143 continue
145 if 'type' not in file_analysis:
146 file_analysis['type'] = file_path.suffix.lower().lstrip('.')
148 # Skip files with errors unless they have partial results
149 if 'errors' in file_analysis and not file_analysis.get('metrics', {}).get('loc', 0):
150 print(f"Error analyzing {file_path}: {file_analysis['errors']}")
151 continue
153 # Update file types count
154 ext = file_path.suffix
155 analysis['summary']['project_stats']['by_type'][ext] = \
156 analysis['summary']['project_stats']['by_type'].get(ext, 0) + 1
158 # Store file analysis
159 analysis['files'][str_path] = file_analysis
161 # Update metrics
162 self._update_metrics(analysis, file_analysis, str_path)
164 except Exception as e:
165 print(f"Error analyzing {file_path}: {e}")
166 continue
168 # Add tree structure generation
169 from ..utils.tree import ProjectTree
171 # Get excluded paths from analysis
172 excluded_paths = set()
173 if hasattr(self, '_excluded_paths'):
174 excluded_paths = self._excluded_paths
176 # Generate tree structure
177 tree_generator = ProjectTree(ignore_patterns=[], max_depth=4)
178 project_tree = tree_generator.generate_tree(path, excluded_paths)
179 summary_tree = tree_generator.generate_summary_tree(path, excluded_paths)
181 # Add to analysis structure
182 analysis['summary']['structure']['project_tree'] = project_tree
183 analysis['summary']['structure']['tree_summary'] = summary_tree
185 # Calculate final metrics
186 self._calculate_final_metrics(analysis)
188 # Generate insights
189 if insights_gen := analysis.get('summary', {}).get('insights_generator'):
190 analysis['insights'] = insights_gen(analysis)
191 else:
192 analysis['insights'] = self._generate_default_insights(analysis)
194 return AnalysisResult(**analysis)
196 def _analyze_package_json(self, path: Path):
197 from .config import analyze_package_json
198 return analyze_package_json(path / 'package.json')
200 def _analyze_tsconfig(self, path: Path):
201 from .config import analyze_tsconfig
202 return analyze_tsconfig(path / 'tsconfig.json')
204 def _analyze_next_config(self, path: Path):
205 config_file = path / 'next.config.js'
206 if config_file.exists():
207 return {'exists': True, 'type': 'next.js config'}
208 return None
210 def _analyze_tailwind_config(self, path: Path):
211 config_file = path / 'tailwind.config.js'
212 if config_file.exists():
213 return {'exists': True, 'type': 'tailwind config'}
214 return None
216 def _analyze_pyproject_toml(self, path: Path):
217 config_file = path / 'pyproject.toml'
218 if config_file.exists():
219 try:
220 import tomli
221 with open(config_file, 'rb') as f:
222 data = tomli.load(f)
223 return {'name': data.get('project', {}).get('name'), 'type': 'python project'}
224 except:
225 return {'error': 'Failed to parse pyproject.toml'}
226 return None
228 def _analyze_requirements(self, path: Path):
229 req_file = path / 'requirements.txt'
230 if req_file.exists():
231 try:
232 with open(req_file, 'r') as f:
233 lines = [line.strip() for line in f if line.strip() and not line.startswith('#')]
234 return {'dependencies': len(lines), 'type': 'python requirements'}
235 except:
236 return {'error': 'Failed to parse requirements.txt'}
237 return None
239 def _analyze_env_example(self, path: Path):
240 env_file = path / '.env.example'
241 if env_file.exists():
242 try:
243 with open(env_file, 'r') as f:
244 lines = [line for line in f if '=' in line and not line.startswith('#')]
245 return {'env_vars': len(lines), 'type': 'environment template'}
246 except:
247 return {'error': 'Failed to parse .env.example'}
248 return None
250 def _extract_readme_summary(self, path: Path):
251 from .config import extract_readme_summary
252 return extract_readme_summary(path)
254 def _analyze_project_configuration(self, path: Path) -> dict:
255 """Analyze project configuration files for additional context."""
256 config_files = {
257 'package.json': self._analyze_package_json(path),
258 'tsconfig.json': self._analyze_tsconfig(path),
259 'next.config.js': self._analyze_next_config(path),
260 'tailwind.config.js': self._analyze_tailwind_config(path),
261 'pyproject.toml': self._analyze_pyproject_toml(path),
262 'requirements.txt': self._analyze_requirements(path),
263 '.env.example': self._analyze_env_example(path),
264 'README.md': self._extract_readme_summary(path)
265 }
267 # Filter out None values (files that don't exist)
268 return {k: v for k, v in config_files.items() if v is not None}
270 def _collect_files(self, path: Path) -> List[Path]:
271 """Collect all analyzable files from directory."""
272 files = []
274 for file_path in path.rglob('*'):
275 if (file_path.is_file() and
276 file_path.suffix.lower() in self.analyzers):
277 files.append(file_path)
279 return files
281 def _update_metrics(self, analysis: dict, file_analysis: dict, file_path: str) -> None:
282 """Update project metrics with file analysis results."""
283 metrics = file_analysis.get('metrics', {})
285 # Update basic metrics
286 analysis['summary']['project_stats']['lines_of_code'] += metrics.get('loc', 0)
288 # Update function metrics
289 functions = file_analysis.get('functions', [])
290 analysis['summary']['code_metrics']['functions']['count'] += len(functions)
291 analysis['summary']['code_metrics']['functions']['with_docs'] += \
292 sum(1 for f in functions if f.get('docstring'))
293 analysis['summary']['code_metrics']['functions']['complex'] += \
294 sum(1 for f in functions if f.get('complexity', 0) > 5)
296 # Update class metrics
297 classes = file_analysis.get('classes', [])
298 analysis['summary']['code_metrics']['classes']['count'] += len(classes)
299 analysis['summary']['code_metrics']['classes']['with_docs'] += \
300 sum(1 for c in classes if c.get('docstring'))
302 # Update imports
303 imports = file_analysis.get('imports', [])
304 analysis['summary']['code_metrics']['imports']['count'] += len(imports)
305 analysis['summary']['code_metrics']['imports']['unique'].update(imports)
307 # Update structure info
308 dir_path = str(Path(file_path).parent)
309 analysis['summary']['structure']['directories'].add(dir_path)
311 # Update entry points
312 if self._is_entry_point(file_path, file_analysis):
313 analysis['summary']['structure']['entry_points'].append(file_path)
315 # Update core files
316 if self._is_core_file(file_analysis):
317 analysis['summary']['structure']['core_files'].append(file_path)
319 # Update maintenance info
320 for todo in file_analysis.get('todos', []):
321 analysis['summary']['maintenance']['todos'].append({
322 'file': file_path,
323 'line': todo.get('line', 0),
324 'text': todo.get('text', ''),
325 'priority': self._estimate_todo_priority(todo.get('text', ''))
326 })
328 def _calculate_final_metrics(self, analysis: dict) -> None:
329 """Calculate final metrics and handle serialization."""
330 total_files = analysis['summary']['project_stats']['total_files']
331 if total_files > 0:
332 # Calculate average file size
333 analysis['summary']['project_stats']['avg_file_size'] = \
334 analysis['summary']['project_stats']['lines_of_code'] / total_files
336 # Calculate documentation coverage
337 total_elements = (
338 analysis['summary']['code_metrics']['functions']['count'] +
339 analysis['summary']['code_metrics']['classes']['count']
340 )
341 if total_elements > 0:
342 documented = (
343 analysis['summary']['code_metrics']['functions']['with_docs'] +
344 analysis['summary']['code_metrics']['classes']['with_docs']
345 )
346 analysis['summary']['maintenance']['doc_coverage'] = \
347 (documented / total_elements) * 100
349 # Convert sets to lists for serialization
350 analysis['summary']['code_metrics']['imports']['unique'] = \
351 list(analysis['summary']['code_metrics']['imports']['unique'])
352 analysis['summary']['structure']['directories'] = \
353 list(analysis['summary']['structure']['directories'])
355 def _is_entry_point(self, file_path: str, analysis: dict) -> bool:
356 """Identify if a file is a potential entry point."""
357 from ..utils import is_potential_entry_point
358 return is_potential_entry_point(file_path, analysis)
360 def _is_core_file(self, analysis: dict) -> bool:
361 """Identify if a file is likely a core component."""
362 from ..utils import is_core_file
363 return is_core_file(analysis)
365 def _estimate_todo_priority(self, text: str) -> str:
366 """Estimate TODO priority based on content."""
367 from ..utils import estimate_todo_priority
368 return estimate_todo_priority(text)
370 def _generate_default_insights(self, analysis: dict) -> List[str]:
371 """Generate default insights from analysis results."""
372 insights = []
374 # Basic project stats
375 total_files = analysis['summary']['project_stats']['total_files']
376 insights.append(f"Project contains {total_files} analyzable files")
378 # Documentation insights
379 doc_coverage = analysis['summary']['maintenance']['doc_coverage']
380 if doc_coverage < 50:
381 insights.append(f"Low documentation coverage ({doc_coverage:.1f}%)")
382 elif doc_coverage > 80:
383 insights.append(f"Good documentation coverage ({doc_coverage:.1f}%)")
385 # Complexity insights
386 complex_funcs = analysis['summary']['code_metrics']['functions']['complex']
387 if complex_funcs > 0:
388 insights.append(f"Found {complex_funcs} complex functions that might need attention")
390 # TODO insights
391 todos = analysis['summary']['maintenance']['todos']
392 if todos:
393 high_priority = sum(1 for todo in todos if todo['priority'] == 'high')
394 if high_priority > 0:
395 insights.append(f"Found {high_priority} high-priority TODOs")
397 return insights