Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# mysql/reflection.py 

2# Copyright (C) 2005-2020 the SQLAlchemy authors and contributors 

3# <see AUTHORS file> 

4# 

5# This module is part of SQLAlchemy and is released under 

6# the MIT License: http://www.opensource.org/licenses/mit-license.php 

7 

8import re 

9 

10from .enumerated import _EnumeratedValues 

11from .enumerated import SET 

12from .types import DATETIME 

13from .types import TIME 

14from .types import TIMESTAMP 

15from ... import log 

16from ... import types as sqltypes 

17from ... import util 

18 

19 

20class ReflectedState(object): 

21 """Stores raw information about a SHOW CREATE TABLE statement.""" 

22 

23 def __init__(self): 

24 self.columns = [] 

25 self.table_options = {} 

26 self.table_name = None 

27 self.keys = [] 

28 self.fk_constraints = [] 

29 self.ck_constraints = [] 

30 

31 

32@log.class_logger 

33class MySQLTableDefinitionParser(object): 

34 """Parses the results of a SHOW CREATE TABLE statement.""" 

35 

36 def __init__(self, dialect, preparer): 

37 self.dialect = dialect 

38 self.preparer = preparer 

39 self._prep_regexes() 

40 

41 def parse(self, show_create, charset): 

42 state = ReflectedState() 

43 state.charset = charset 

44 for line in re.split(r"\r?\n", show_create): 

45 if line.startswith(" " + self.preparer.initial_quote): 

46 self._parse_column(line, state) 

47 # a regular table options line 

48 elif line.startswith(") "): 

49 self._parse_table_options(line, state) 

50 # an ANSI-mode table options line 

51 elif line == ")": 

52 pass 

53 elif line.startswith("CREATE "): 

54 self._parse_table_name(line, state) 

55 # Not present in real reflection, but may be if 

56 # loading from a file. 

57 elif not line: 

58 pass 

59 else: 

60 type_, spec = self._parse_constraints(line) 

61 if type_ is None: 

62 util.warn("Unknown schema content: %r" % line) 

63 elif type_ == "key": 

64 state.keys.append(spec) 

65 elif type_ == "fk_constraint": 

66 state.fk_constraints.append(spec) 

67 elif type_ == "ck_constraint": 

68 state.ck_constraints.append(spec) 

69 else: 

70 pass 

71 return state 

72 

73 def _parse_constraints(self, line): 

74 """Parse a KEY or CONSTRAINT line. 

75 

76 :param line: A line of SHOW CREATE TABLE output 

77 """ 

78 

79 # KEY 

80 m = self._re_key.match(line) 

81 if m: 

82 spec = m.groupdict() 

83 # convert columns into name, length pairs 

84 # NOTE: we may want to consider SHOW INDEX as the 

85 # format of indexes in MySQL becomes more complex 

86 spec["columns"] = self._parse_keyexprs(spec["columns"]) 

87 if spec["version_sql"]: 

88 m2 = self._re_key_version_sql.match(spec["version_sql"]) 

89 if m2 and m2.groupdict()["parser"]: 

90 spec["parser"] = m2.groupdict()["parser"] 

91 if spec["parser"]: 

92 spec["parser"] = self.preparer.unformat_identifiers( 

93 spec["parser"] 

94 )[0] 

95 return "key", spec 

96 

97 # FOREIGN KEY CONSTRAINT 

98 m = self._re_fk_constraint.match(line) 

99 if m: 

100 spec = m.groupdict() 

101 spec["table"] = self.preparer.unformat_identifiers(spec["table"]) 

102 spec["local"] = [c[0] for c in self._parse_keyexprs(spec["local"])] 

103 spec["foreign"] = [ 

104 c[0] for c in self._parse_keyexprs(spec["foreign"]) 

105 ] 

106 return "fk_constraint", spec 

107 

108 # CHECK constraint 

109 m = self._re_ck_constraint.match(line) 

110 if m: 

111 spec = m.groupdict() 

112 return "ck_constraint", spec 

113 

114 # PARTITION and SUBPARTITION 

115 m = self._re_partition.match(line) 

116 if m: 

117 # Punt! 

118 return "partition", line 

119 

120 # No match. 

121 return (None, line) 

122 

123 def _parse_table_name(self, line, state): 

124 """Extract the table name. 

125 

126 :param line: The first line of SHOW CREATE TABLE 

127 """ 

128 

129 regex, cleanup = self._pr_name 

130 m = regex.match(line) 

131 if m: 

132 state.table_name = cleanup(m.group("name")) 

133 

134 def _parse_table_options(self, line, state): 

135 """Build a dictionary of all reflected table-level options. 

136 

137 :param line: The final line of SHOW CREATE TABLE output. 

138 """ 

139 

140 options = {} 

141 

142 if not line or line == ")": 

143 pass 

144 

145 else: 

146 rest_of_line = line[:] 

147 for regex, cleanup in self._pr_options: 

148 m = regex.search(rest_of_line) 

149 if not m: 

150 continue 

151 directive, value = m.group("directive"), m.group("val") 

152 if cleanup: 

153 value = cleanup(value) 

154 options[directive.lower()] = value 

155 rest_of_line = regex.sub("", rest_of_line) 

156 

157 for nope in ("auto_increment", "data directory", "index directory"): 

158 options.pop(nope, None) 

159 

160 for opt, val in options.items(): 

161 state.table_options["%s_%s" % (self.dialect.name, opt)] = val 

162 

163 def _parse_column(self, line, state): 

164 """Extract column details. 

165 

166 Falls back to a 'minimal support' variant if full parse fails. 

167 

168 :param line: Any column-bearing line from SHOW CREATE TABLE 

169 """ 

170 

171 spec = None 

172 m = self._re_column.match(line) 

173 if m: 

174 spec = m.groupdict() 

175 spec["full"] = True 

176 else: 

177 m = self._re_column_loose.match(line) 

178 if m: 

179 spec = m.groupdict() 

180 spec["full"] = False 

181 if not spec: 

182 util.warn("Unknown column definition %r" % line) 

183 return 

184 if not spec["full"]: 

185 util.warn("Incomplete reflection of column definition %r" % line) 

186 

187 name, type_, args = spec["name"], spec["coltype"], spec["arg"] 

188 

189 try: 

190 col_type = self.dialect.ischema_names[type_] 

191 except KeyError: 

192 util.warn( 

193 "Did not recognize type '%s' of column '%s'" % (type_, name) 

194 ) 

195 col_type = sqltypes.NullType 

196 

197 # Column type positional arguments eg. varchar(32) 

198 if args is None or args == "": 

199 type_args = [] 

200 elif args[0] == "'" and args[-1] == "'": 

201 type_args = self._re_csv_str.findall(args) 

202 else: 

203 type_args = [int(v) for v in self._re_csv_int.findall(args)] 

204 

205 # Column type keyword options 

206 type_kw = {} 

207 

208 if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)): 

209 if type_args: 

210 type_kw["fsp"] = type_args.pop(0) 

211 

212 for kw in ("unsigned", "zerofill"): 

213 if spec.get(kw, False): 

214 type_kw[kw] = True 

215 for kw in ("charset", "collate"): 

216 if spec.get(kw, False): 

217 type_kw[kw] = spec[kw] 

218 if issubclass(col_type, _EnumeratedValues): 

219 type_args = _EnumeratedValues._strip_values(type_args) 

220 

221 if issubclass(col_type, SET) and "" in type_args: 

222 type_kw["retrieve_as_bitwise"] = True 

223 

224 type_instance = col_type(*type_args, **type_kw) 

225 

226 col_kw = {} 

227 

228 # NOT NULL 

229 col_kw["nullable"] = True 

230 # this can be "NULL" in the case of TIMESTAMP 

231 if spec.get("notnull", False) == "NOT NULL": 

232 col_kw["nullable"] = False 

233 

234 # AUTO_INCREMENT 

235 if spec.get("autoincr", False): 

236 col_kw["autoincrement"] = True 

237 elif issubclass(col_type, sqltypes.Integer): 

238 col_kw["autoincrement"] = False 

239 

240 # DEFAULT 

241 default = spec.get("default", None) 

242 

243 if default == "NULL": 

244 # eliminates the need to deal with this later. 

245 default = None 

246 

247 comment = spec.get("comment", None) 

248 

249 if comment is not None: 

250 comment = comment.replace("\\\\", "\\").replace("''", "'") 

251 

252 sqltext = spec.get("generated") 

253 if sqltext is not None: 

254 computed = dict(sqltext=sqltext) 

255 persisted = spec.get("persistence") 

256 if persisted is not None: 

257 computed["persisted"] = persisted == "STORED" 

258 col_kw["computed"] = computed 

259 

260 col_d = dict( 

261 name=name, type=type_instance, default=default, comment=comment 

262 ) 

263 col_d.update(col_kw) 

264 state.columns.append(col_d) 

265 

266 def _describe_to_create(self, table_name, columns): 

267 """Re-format DESCRIBE output as a SHOW CREATE TABLE string. 

268 

269 DESCRIBE is a much simpler reflection and is sufficient for 

270 reflecting views for runtime use. This method formats DDL 

271 for columns only- keys are omitted. 

272 

273 :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples. 

274 SHOW FULL COLUMNS FROM rows must be rearranged for use with 

275 this function. 

276 """ 

277 

278 buffer = [] 

279 for row in columns: 

280 (name, col_type, nullable, default, extra) = [ 

281 row[i] for i in (0, 1, 2, 4, 5) 

282 ] 

283 

284 line = [" "] 

285 line.append(self.preparer.quote_identifier(name)) 

286 line.append(col_type) 

287 if not nullable: 

288 line.append("NOT NULL") 

289 if default: 

290 if "auto_increment" in default: 

291 pass 

292 elif col_type.startswith("timestamp") and default.startswith( 

293 "C" 

294 ): 

295 line.append("DEFAULT") 

296 line.append(default) 

297 elif default == "NULL": 

298 line.append("DEFAULT") 

299 line.append(default) 

300 else: 

301 line.append("DEFAULT") 

302 line.append("'%s'" % default.replace("'", "''")) 

303 if extra: 

304 line.append(extra) 

305 

306 buffer.append(" ".join(line)) 

307 

308 return "".join( 

309 [ 

310 ( 

311 "CREATE TABLE %s (\n" 

312 % self.preparer.quote_identifier(table_name) 

313 ), 

314 ",\n".join(buffer), 

315 "\n) ", 

316 ] 

317 ) 

318 

319 def _parse_keyexprs(self, identifiers): 

320 """Unpack '"col"(2),"col" ASC'-ish strings into components.""" 

321 

322 return self._re_keyexprs.findall(identifiers) 

323 

324 def _prep_regexes(self): 

325 """Pre-compile regular expressions.""" 

326 

327 self._re_columns = [] 

328 self._pr_options = [] 

329 

330 _final = self.preparer.final_quote 

331 

332 quotes = dict( 

333 zip( 

334 ("iq", "fq", "esc_fq"), 

335 [ 

336 re.escape(s) 

337 for s in ( 

338 self.preparer.initial_quote, 

339 _final, 

340 self.preparer._escape_identifier(_final), 

341 ) 

342 ], 

343 ) 

344 ) 

345 

346 self._pr_name = _pr_compile( 

347 r"^CREATE (?:\w+ +)?TABLE +" 

348 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes, 

349 self.preparer._unescape_identifier, 

350 ) 

351 

352 # `col`,`col2`(32),`col3`(15) DESC 

353 # 

354 self._re_keyexprs = _re_compile( 

355 r"(?:" 

356 r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)" 

357 r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes 

358 ) 

359 

360 # 'foo' or 'foo','bar' or 'fo,o','ba''a''r' 

361 self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27") 

362 

363 # 123 or 123,456 

364 self._re_csv_int = _re_compile(r"\d+") 

365 

366 # `colname` <type> [type opts] 

367 # (NOT NULL | NULL) 

368 # DEFAULT ('value' | CURRENT_TIMESTAMP...) 

369 # COMMENT 'comment' 

370 # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT) 

371 # STORAGE (DISK|MEMORY) 

372 self._re_column = _re_compile( 

373 r" " 

374 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +" 

375 r"(?P<coltype>\w+)" 

376 r"(?:\((?P<arg>(?:\d+|\d+,\d+|" 

377 r"(?:'(?:''|[^'])*',?)+))\))?" 

378 r"(?: +(?P<unsigned>UNSIGNED))?" 

379 r"(?: +(?P<zerofill>ZEROFILL))?" 

380 r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?" 

381 r"(?: +COLLATE +(?P<collate>[\w_]+))?" 

382 r"(?: +(?P<notnull>(?:NOT )?NULL))?" 

383 r"(?: +DEFAULT +(?P<default>" 

384 r"(?:NULL|'(?:''|[^'])*'|[\w\(\)]+" 

385 r"(?: +ON UPDATE [\w\(\)]+)?)" 

386 r"))?" 

387 r"(?: +(?:GENERATED ALWAYS)? ?AS +(?P<generated>\(" 

388 r".*\))? ?(?P<persistence>VIRTUAL|STORED)?)?" 

389 r"(?: +(?P<autoincr>AUTO_INCREMENT))?" 

390 r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?" 

391 r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?" 

392 r"(?: +STORAGE +(?P<storage>\w+))?" 

393 r"(?: +(?P<extra>.*))?" 

394 r",?$" % quotes 

395 ) 

396 

397 # Fallback, try to parse as little as possible 

398 self._re_column_loose = _re_compile( 

399 r" " 

400 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +" 

401 r"(?P<coltype>\w+)" 

402 r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?" 

403 r".*?(?P<notnull>(?:NOT )NULL)?" % quotes 

404 ) 

405 

406 # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))? 

407 # (`col` (ASC|DESC)?, `col` (ASC|DESC)?) 

408 # KEY_BLOCK_SIZE size | WITH PARSER name /*!50100 WITH PARSER name */ 

409 self._re_key = _re_compile( 

410 r" " 

411 r"(?:(?P<type>\S+) )?KEY" 

412 r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?" 

413 r"(?: +USING +(?P<using_pre>\S+))?" 

414 r" +\((?P<columns>.+?)\)" 

415 r"(?: +USING +(?P<using_post>\S+))?" 

416 r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?" 

417 r"(?: +WITH PARSER +(?P<parser>\S+))?" 

418 r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?" 

419 r"(?: +/\*(?P<version_sql>.+)\*/ +)?" 

420 r",?$" % quotes 

421 ) 

422 

423 # https://forums.mysql.com/read.php?20,567102,567111#msg-567111 

424 # It means if the MySQL version >= \d+, execute what's in the comment 

425 self._re_key_version_sql = _re_compile( 

426 r"\!\d+ " r"(?: *WITH PARSER +(?P<parser>\S+) *)?" 

427 ) 

428 

429 # CONSTRAINT `name` FOREIGN KEY (`local_col`) 

430 # REFERENCES `remote` (`remote_col`) 

431 # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE 

432 # ON DELETE CASCADE ON UPDATE RESTRICT 

433 # 

434 # unique constraints come back as KEYs 

435 kw = quotes.copy() 

436 kw["on"] = "RESTRICT|CASCADE|SET NULL|NOACTION" 

437 self._re_fk_constraint = _re_compile( 

438 r" " 

439 r"CONSTRAINT +" 

440 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +" 

441 r"FOREIGN KEY +" 

442 r"\((?P<local>[^\)]+?)\) REFERENCES +" 

443 r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s" 

444 r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +" 

445 r"\((?P<foreign>[^\)]+?)\)" 

446 r"(?: +(?P<match>MATCH \w+))?" 

447 r"(?: +ON DELETE (?P<ondelete>%(on)s))?" 

448 r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw 

449 ) 

450 

451 # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)' 

452 # testing on MariaDB 10.2 shows that the CHECK constraint 

453 # is returned on a line by itself, so to match without worrying 

454 # about parenthesis in the expresion we go to the end of the line 

455 self._re_ck_constraint = _re_compile( 

456 r" " 

457 r"CONSTRAINT +" 

458 r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +" 

459 r"CHECK +" 

460 r"\((?P<sqltext>.+)\),?" % kw 

461 ) 

462 

463 # PARTITION 

464 # 

465 # punt! 

466 self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)") 

467 

468 # Table-level options (COLLATE, ENGINE, etc.) 

469 # Do the string options first, since they have quoted 

470 # strings we need to get rid of. 

471 for option in _options_of_type_string: 

472 self._add_option_string(option) 

473 

474 for option in ( 

475 "ENGINE", 

476 "TYPE", 

477 "AUTO_INCREMENT", 

478 "AVG_ROW_LENGTH", 

479 "CHARACTER SET", 

480 "DEFAULT CHARSET", 

481 "CHECKSUM", 

482 "COLLATE", 

483 "DELAY_KEY_WRITE", 

484 "INSERT_METHOD", 

485 "MAX_ROWS", 

486 "MIN_ROWS", 

487 "PACK_KEYS", 

488 "ROW_FORMAT", 

489 "KEY_BLOCK_SIZE", 

490 ): 

491 self._add_option_word(option) 

492 

493 self._add_option_regex("UNION", r"\([^\)]+\)") 

494 self._add_option_regex("TABLESPACE", r".*? STORAGE DISK") 

495 self._add_option_regex( 

496 "RAID_TYPE", 

497 r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+", 

498 ) 

499 

500 _optional_equals = r"(?:\s*(?:=\s*)|\s+)" 

501 

502 def _add_option_string(self, directive): 

503 regex = r"(?P<directive>%s)%s" r"'(?P<val>(?:[^']|'')*?)'(?!')" % ( 

504 re.escape(directive), 

505 self._optional_equals, 

506 ) 

507 self._pr_options.append( 

508 _pr_compile( 

509 regex, lambda v: v.replace("\\\\", "\\").replace("''", "'") 

510 ) 

511 ) 

512 

513 def _add_option_word(self, directive): 

514 regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % ( 

515 re.escape(directive), 

516 self._optional_equals, 

517 ) 

518 self._pr_options.append(_pr_compile(regex)) 

519 

520 def _add_option_regex(self, directive, regex): 

521 regex = r"(?P<directive>%s)%s" r"(?P<val>%s)" % ( 

522 re.escape(directive), 

523 self._optional_equals, 

524 regex, 

525 ) 

526 self._pr_options.append(_pr_compile(regex)) 

527 

528 

529_options_of_type_string = ( 

530 "COMMENT", 

531 "DATA DIRECTORY", 

532 "INDEX DIRECTORY", 

533 "PASSWORD", 

534 "CONNECTION", 

535) 

536 

537 

538def _pr_compile(regex, cleanup=None): 

539 """Prepare a 2-tuple of compiled regex and callable.""" 

540 

541 return (_re_compile(regex), cleanup) 

542 

543 

544def _re_compile(regex): 

545 """Compile a string to regex, I and UNICODE.""" 

546 

547 return re.compile(regex, re.I | re.UNICODE)