Coverage for src\llm_code_lens\analyzer\sql.py: 11%

175 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-05-25 12:07 +0300

1import re 

2import os 

3import pyodbc 

4from pathlib import Path 

5from typing import Dict, List, Optional 

6 

7class SQLServerAnalyzer: 

8 """SQL Server code analyzer for stored procedures and views.""" 

9 

10 def __init__(self): 

11 self.conn = None 

12 self.cursor = None 

13 

14 def connect(self, connection_string: Optional[str] = None) -> None: 

15 """ 

16 Connect to SQL Server using either provided connection string or environment variables. 

17  

18 Args: 

19 connection_string: Optional connection string. If not provided, uses environment variables. 

20 """ 

21 try: 

22 if connection_string: 

23 self.conn = pyodbc.connect(connection_string) 

24 else: 

25 # Use environment variables 

26 server = os.getenv('MSSQL_SERVER') 

27 username = os.getenv('MSSQL_USERNAME') 

28 password = os.getenv('MSSQL_PASSWORD') 

29 

30 if not server: 

31 raise ValueError("No server specified. Provide connection string or set MSSQL_SERVER environment variable") 

32 

33 # Build connection string 

34 conn_str = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server}' 

35 if username and password: 

36 conn_str += f';UID={username};PWD={password}' 

37 else: 

38 conn_str += ';Trusted_Connection=yes' 

39 

40 self.conn = pyodbc.connect(conn_str) 

41 

42 self.cursor = self.conn.cursor() 

43 

44 except Exception as e: 

45 raise ConnectionError(f"Failed to connect to SQL Server: {str(e)}") 

46 

47 def list_databases(self) -> List[str]: 

48 """List all accessible databases.""" 

49 if not self.cursor: 

50 raise ConnectionError("Not connected to SQL Server") 

51 

52 self.cursor.execute("SELECT name FROM sys.databases WHERE database_id > 4") # Skip system DBs 

53 return [row.name for row in self.cursor.fetchall()] 

54 

55 def analyze_database(self, database: str) -> Dict: 

56 """ 

57 Analyze a specific database. 

58  

59 Args: 

60 database: Name of the database to analyze 

61  

62 Returns: 

63 Dict containing analysis of stored procedures, views, and functions 

64 """ 

65 if not self.cursor: 

66 raise ConnectionError("Not connected to SQL Server") 

67 

68 # Switch to specified database 

69 self.cursor.execute(f"USE [{database}]") 

70 

71 return { 

72 'stored_procedures': self._analyze_stored_procedures(), 

73 'views': self._analyze_views(), 

74 'functions': self._analyze_functions() 

75 } 

76 

77 def _analyze_stored_procedures(self) -> List[Dict]: 

78 """Analyze stored procedures in current database.""" 

79 self.cursor.execute(""" 

80 SELECT  

81 OBJECT_SCHEMA_NAME(p.object_id) as schema_name, 

82 p.name, 

83 m.definition, 

84 p.create_date, 

85 p.modify_date 

86 FROM sys.procedures p 

87 INNER JOIN sys.sql_modules m ON p.object_id = m.object_id 

88 ORDER BY schema_name, p.name 

89 """) 

90 

91 procedures = [] 

92 for row in self.cursor.fetchall(): 

93 proc_def = row.definition 

94 

95 # Analyze the procedure 

96 proc_analysis = { 

97 'schema': row.schema_name, 

98 'name': row.name, 

99 'definition': proc_def, 

100 'metrics': { 

101 'lines': len(proc_def.splitlines()), 

102 'complexity': self._estimate_complexity(proc_def) 

103 }, 

104 'parameters': self._extract_parameters(proc_def), 

105 'dependencies': self._extract_dependencies(proc_def), 

106 'todos': [], 

107 'comments': [] 

108 } 

109 

110 # Extract comments and TODOs 

111 comments, todos = self._extract_comments_and_todos(proc_def) 

112 proc_analysis['comments'] = comments 

113 proc_analysis['todos'] = todos 

114 

115 procedures.append(proc_analysis) 

116 

117 return procedures 

118 

119 def _analyze_views(self) -> List[Dict]: 

120 """Analyze views in current database.""" 

121 self.cursor.execute(""" 

122 SELECT  

123 OBJECT_SCHEMA_NAME(v.object_id) as schema_name, 

124 v.name, 

125 m.definition, 

126 v.create_date, 

127 v.modify_date 

128 FROM sys.views v 

129 INNER JOIN sys.sql_modules m ON v.object_id = m.object_id 

130 ORDER BY schema_name, v.name 

131 """) 

132 

133 views = [] 

134 for row in self.cursor.fetchall(): 

135 view_def = row.definition 

136 

137 # Analyze the view 

138 view_analysis = { 

139 'schema': row.schema_name, 

140 'name': row.name, 

141 'definition': view_def, 

142 'metrics': { 

143 'lines': len(view_def.splitlines()), 

144 'complexity': self._estimate_complexity(view_def) 

145 }, 

146 'dependencies': self._extract_dependencies(view_def), 

147 'todos': [], 

148 'comments': [] 

149 } 

150 

151 # Extract comments and TODOs 

152 comments, todos = self._extract_comments_and_todos(view_def) 

153 view_analysis['comments'] = comments 

154 view_analysis['todos'] = todos 

155 

156 views.append(view_analysis) 

157 

158 return views 

159 

160 def _analyze_functions(self) -> List[Dict]: 

161 """Analyze functions in current database.""" 

162 self.cursor.execute(""" 

163 SELECT  

164 OBJECT_SCHEMA_NAME(f.object_id) as schema_name, 

165 f.name, 

166 m.definition, 

167 f.create_date, 

168 f.modify_date, 

169 f.type 

170 FROM sys.objects f 

171 INNER JOIN sys.sql_modules m ON f.object_id = m.object_id 

172 WHERE f.type IN ('FN', 'IF', 'TF') -- Scalar, Inline Table, Table-valued 

173 ORDER BY schema_name, f.name 

174 """) 

175 

176 functions = [] 

177 for row in self.cursor.fetchall(): 

178 func_def = row.definition 

179 

180 # Analyze the function 

181 func_analysis = { 

182 'schema': row.schema_name, 

183 'name': row.name, 

184 'definition': func_def, 

185 'metrics': { 

186 'lines': len(func_def.splitlines()), 

187 'complexity': self._estimate_complexity(func_def) 

188 }, 

189 'parameters': self._extract_parameters(func_def), 

190 'dependencies': self._extract_dependencies(func_def), 

191 'todos': [], 

192 'comments': [] 

193 } 

194 

195 # Extract comments and TODOs 

196 comments, todos = self._extract_comments_and_todos(func_def) 

197 func_analysis['comments'] = comments 

198 func_analysis['todos'] = todos 

199 

200 functions.append(func_analysis) 

201 

202 return functions 

203 

204 def analyze_file(self, file_path: Path) -> dict: 

205 """Analyze a SQL file.""" 

206 with open(file_path, 'r', encoding='utf-8') as f: 

207 content = f.read() 

208 

209 analysis = { 

210 'type': 'sql', 

211 'metrics': { 

212 'loc': len(content.splitlines()), 

213 'complexity': self._estimate_complexity(content) 

214 }, 

215 'objects': [], 

216 'parameters': [], 

217 'comments': [], 

218 'todos': [], 

219 'dependencies': self._extract_dependencies(content) 

220 } 

221 

222 # Extract SQL objects 

223 objects = self._extract_sql_objects(content) 

224 if objects: 

225 analysis['objects'] = objects 

226 

227 # Extract parameters with comments 

228 params = self._extract_parameters(content) 

229 if params: 

230 analysis['parameters'] = params 

231 

232 # Extract comments and TODOs 

233 comments, todos = self._extract_comments_and_todos(content) 

234 analysis['comments'] = comments 

235 analysis['todos'] = todos 

236 

237 return analysis 

238 

239 def __del__(self): 

240 """Cleanup database connections.""" 

241 if self.cursor: 

242 self.cursor.close() 

243 if self.conn: 

244 self.conn.close() 

245 

246 # Rest of the existing methods remain the same 

247 def _extract_sql_objects(self, content: str) -> List[dict]: 

248 """Extract SQL objects like procedures, functions, and views.""" 

249 objects = [] 

250 

251 # Match CREATE/ALTER statements 

252 patterns = { 

253 'procedure': r'CREATE\s+(?:OR\s+ALTER\s+)?PROCEDURE\s+([^\s]+)', 

254 'function': r'CREATE\s+(?:OR\s+ALTER\s+)?FUNCTION\s+([^\s]+)', 

255 'view': r'CREATE\s+(?:OR\s+ALTER\s+)?VIEW\s+([^\s]+)' 

256 } 

257 

258 for obj_type, pattern in patterns.items(): 

259 matches = re.finditer(pattern, content, re.IGNORECASE) 

260 for match in matches: 

261 name = match.group(1) 

262 # Find the object's body 

263 start_pos = match.start() 

264 # Look for GO or end of file 

265 end_match = re.search(r'\bGO\b', content[start_pos:], re.IGNORECASE) 

266 if end_match: 

267 end_pos = start_pos + end_match.start() 

268 definition = content[start_pos:end_pos].strip() 

269 else: 

270 definition = content[start_pos:].strip() 

271 

272 objects.append({ 

273 'type': obj_type, 

274 'name': name, 

275 'definition': definition, 

276 'loc': len(definition.splitlines()), 

277 'complexity': self._estimate_complexity(definition) 

278 }) 

279 

280 return objects 

281 

282 def _extract_parameters(self, content: str) -> List[dict]: 

283 """Extract parameters from procedure or function definitions.""" 

284 params = [] 

285 # Find the procedure declaration 

286 proc_match = re.search( 

287 r'CREATE\s+(?:OR\s+ALTER\s+)?(?:PROCEDURE|FUNCTION)\s+([^\s]+)([\s\S]+?)AS\b', 

288 content, 

289 re.IGNORECASE 

290 ) 

291 

292 if proc_match: 

293 param_section = proc_match.group(2) 

294 # Extract each parameter line, handling multiline declarations 

295 param_lines = re.findall( 

296 r'@\w+\s+[^,@]+(?:\s*=\s*[^,]+)?(?=\s*,|\s*AS\b|\s*$)', 

297 param_section, 

298 re.IGNORECASE | re.DOTALL 

299 ) 

300 

301 for param_line in param_lines: 

302 # Extract individual parameter components 

303 param_match = re.match( 

304 r'@(\w+)\s+([^=\s]+(?:\([^)]*\))?)\s*(?:=\s*([^,\s][^,]*)?)?', 

305 param_line.strip() 

306 ) 

307 

308 if param_match: 

309 name, data_type, default = param_match.groups() 

310 param_info = { 

311 'name': name, 

312 'data_type': data_type.strip() 

313 } 

314 

315 if default: 

316 param_info['default'] = default.strip() 

317 

318 # Look for inline comment on the same line 

319 comment_match = re.search(r'--\s*(.*?)(?:\r?\n|$)', param_line) 

320 if comment_match: 

321 param_info['description'] = comment_match.group(1).strip() 

322 

323 params.append(param_info) 

324 

325 # Update parameter documentation from nearby comments 

326 lines = content.splitlines() 

327 for i, line in enumerate(lines): 

328 if '--' in line and any(param['name'] in line for param in params): 

329 comment = line[line.index('--')+2:].strip() 

330 param_name = next( 

331 (param['name'] for param in params if param['name'] in line), 

332 None 

333 ) 

334 if param_name: 

335 param = next(p for p in params if p['name'] == param_name) 

336 if 'description' not in param: 

337 param['description'] = comment 

338 

339 return params 

340 

341 def _extract_dependencies(self, content: str) -> List[str]: 

342 """Extract table and view dependencies.""" 

343 deps = set() 

344 

345 # Define patterns for table references 

346 patterns = [ 

347 # FROM, JOIN, UPDATE, etc. followed by table name 

348 r'(?:FROM|JOIN|INTO|UPDATE)\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b(?!\s*[=@])', 

349 # INSERT INTO pattern 

350 r'INSERT\s+INTO\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b', 

351 # REFERENCES in constraints 

352 r'REFERENCES\s+([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)?)\b' 

353 ] 

354 

355 # Define words that should not be treated as table names 

356 excluded_words = { 

357 'null', 'select', 'where', 'group', 'order', 'having', 

358 'exists', 'between', 'like', 'in', 'is', 'not', 'and', 'or', 

359 'operation', 'existing' # Add common variables 

360 } 

361 

362 for pattern in patterns: 

363 for match in re.finditer(pattern, content, re.IGNORECASE): 

364 table_name = match.group(1).strip() 

365 if table_name.lower() not in excluded_words: 

366 deps.add(table_name) 

367 

368 return sorted(list(deps)) 

369 

370 def _extract_comments_and_todos(self, content: str) -> tuple: 

371 """Extract comments and TODOs from SQL code.""" 

372 comments = [] 

373 todos = [] 

374 

375 # Match inline comments and block comments 

376 patterns = [ 

377 (r'--([^\n]+)', False), # Inline comments 

378 (r'/\*[\s\S]*?\*/', True) # Block comments 

379 ] 

380 

381 for pattern, is_multiline in patterns: 

382 for match in re.finditer(pattern, content): 

383 comment = match.group() 

384 if is_multiline: 

385 comment = comment.strip('/*').strip('*/') 

386 else: 

387 comment = comment.strip('--') 

388 comment = comment.strip() 

389 

390 # Skip empty comments and parameter comments 

391 if not comment or comment.startswith('@'): 

392 continue 

393 

394 line_num = content[:match.start()].count('\n') + 1 

395 

396 if any(marker in comment.upper() 

397 for marker in ['TODO', 'FIXME', 'XXX']): 

398 todos.append({ 

399 'text': comment, 

400 'line': line_num 

401 }) 

402 else: 

403 comments.append({ 

404 'text': comment, 

405 'line': line_num 

406 }) 

407 

408 return comments, todos 

409 

410 def _estimate_complexity(self, content: str) -> int: 

411 """Estimate SQL complexity based on various factors.""" 

412 complexity = 0 

413 content_lower = content.lower() 

414 

415 # Control flow complexity 

416 complexity += content_lower.count('if ') * 2 

417 complexity += content_lower.count('else ') * 2 

418 complexity += content_lower.count('case ') * 2 

419 complexity += content_lower.count('while ') * 3 

420 complexity += content_lower.count('cursor') * 4 

421 

422 # Query complexity 

423 complexity += content_lower.count('join ') * 2 

424 complexity += content_lower.count('where ') * 2 

425 complexity += content_lower.count('group by ') * 2 

426 complexity += content_lower.count('having ') * 3 

427 complexity += content_lower.count('union ') * 3 

428 

429 # Transaction complexity 

430 complexity += content_lower.count('transaction') * 2 

431 complexity += content_lower.count('try') * 2 

432 complexity += content_lower.count('catch') * 2 

433 

434 return complexity