sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 123 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 124 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 125 "TIME_TO_TIME_STR": lambda args: exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 130 this=exp.Cast( 131 this=seq_get(args, 0), 132 to=exp.DataType(this=exp.DataType.Type.TEXT), 133 ), 134 start=exp.Literal.number(1), 135 length=exp.Literal.number(10), 136 ), 137 "VAR_MAP": build_var_map, 138 } 139 140 NO_PAREN_FUNCTIONS = { 141 TokenType.CURRENT_DATE: exp.CurrentDate, 142 TokenType.CURRENT_DATETIME: exp.CurrentDate, 143 TokenType.CURRENT_TIME: exp.CurrentTime, 144 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 145 TokenType.CURRENT_USER: exp.CurrentUser, 146 } 147 148 STRUCT_TYPE_TOKENS = { 149 TokenType.NESTED, 150 TokenType.OBJECT, 151 TokenType.STRUCT, 152 } 153 154 NESTED_TYPE_TOKENS = { 155 TokenType.ARRAY, 156 TokenType.LOWCARDINALITY, 157 TokenType.MAP, 158 TokenType.NULLABLE, 159 *STRUCT_TYPE_TOKENS, 160 } 161 162 ENUM_TYPE_TOKENS = { 163 TokenType.ENUM, 164 TokenType.ENUM8, 165 TokenType.ENUM16, 166 } 167 168 AGGREGATE_TYPE_TOKENS = { 169 TokenType.AGGREGATEFUNCTION, 170 TokenType.SIMPLEAGGREGATEFUNCTION, 171 } 172 173 TYPE_TOKENS = { 174 TokenType.BIT, 175 TokenType.BOOLEAN, 176 TokenType.TINYINT, 177 TokenType.UTINYINT, 178 TokenType.SMALLINT, 179 TokenType.USMALLINT, 180 TokenType.INT, 181 TokenType.UINT, 182 TokenType.BIGINT, 183 TokenType.UBIGINT, 184 TokenType.INT128, 185 TokenType.UINT128, 186 TokenType.INT256, 187 TokenType.UINT256, 188 TokenType.MEDIUMINT, 189 TokenType.UMEDIUMINT, 190 TokenType.FIXEDSTRING, 191 TokenType.FLOAT, 192 TokenType.DOUBLE, 193 TokenType.CHAR, 194 TokenType.NCHAR, 195 TokenType.VARCHAR, 196 TokenType.NVARCHAR, 197 TokenType.BPCHAR, 198 TokenType.TEXT, 199 TokenType.MEDIUMTEXT, 200 TokenType.LONGTEXT, 201 TokenType.MEDIUMBLOB, 202 TokenType.LONGBLOB, 203 TokenType.BINARY, 204 TokenType.VARBINARY, 205 TokenType.JSON, 206 TokenType.JSONB, 207 TokenType.INTERVAL, 208 TokenType.TINYBLOB, 209 TokenType.TINYTEXT, 210 TokenType.TIME, 211 TokenType.TIMETZ, 212 TokenType.TIMESTAMP, 213 TokenType.TIMESTAMP_S, 214 TokenType.TIMESTAMP_MS, 215 TokenType.TIMESTAMP_NS, 216 TokenType.TIMESTAMPTZ, 217 TokenType.TIMESTAMPLTZ, 218 TokenType.DATETIME, 219 TokenType.DATETIME64, 220 TokenType.DATE, 221 TokenType.DATE32, 222 TokenType.INT4RANGE, 223 TokenType.INT4MULTIRANGE, 224 TokenType.INT8RANGE, 225 TokenType.INT8MULTIRANGE, 226 TokenType.NUMRANGE, 227 TokenType.NUMMULTIRANGE, 228 TokenType.TSRANGE, 229 TokenType.TSMULTIRANGE, 230 TokenType.TSTZRANGE, 231 TokenType.TSTZMULTIRANGE, 232 TokenType.DATERANGE, 233 TokenType.DATEMULTIRANGE, 234 TokenType.DECIMAL, 235 TokenType.UDECIMAL, 236 TokenType.BIGDECIMAL, 237 TokenType.UUID, 238 TokenType.GEOGRAPHY, 239 TokenType.GEOMETRY, 240 TokenType.HLLSKETCH, 241 TokenType.HSTORE, 242 TokenType.PSEUDO_TYPE, 243 TokenType.SUPER, 244 TokenType.SERIAL, 245 TokenType.SMALLSERIAL, 246 TokenType.BIGSERIAL, 247 TokenType.XML, 248 TokenType.YEAR, 249 TokenType.UNIQUEIDENTIFIER, 250 TokenType.USERDEFINED, 251 TokenType.MONEY, 252 TokenType.SMALLMONEY, 253 TokenType.ROWVERSION, 254 TokenType.IMAGE, 255 TokenType.VARIANT, 256 TokenType.OBJECT, 257 TokenType.OBJECT_IDENTIFIER, 258 TokenType.INET, 259 TokenType.IPADDRESS, 260 TokenType.IPPREFIX, 261 TokenType.IPV4, 262 TokenType.IPV6, 263 TokenType.UNKNOWN, 264 TokenType.NULL, 265 TokenType.NAME, 266 *ENUM_TYPE_TOKENS, 267 *NESTED_TYPE_TOKENS, 268 *AGGREGATE_TYPE_TOKENS, 269 } 270 271 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 272 TokenType.BIGINT: TokenType.UBIGINT, 273 TokenType.INT: TokenType.UINT, 274 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 275 TokenType.SMALLINT: TokenType.USMALLINT, 276 TokenType.TINYINT: TokenType.UTINYINT, 277 TokenType.DECIMAL: TokenType.UDECIMAL, 278 } 279 280 SUBQUERY_PREDICATES = { 281 TokenType.ANY: exp.Any, 282 TokenType.ALL: exp.All, 283 TokenType.EXISTS: exp.Exists, 284 TokenType.SOME: exp.Any, 285 } 286 287 RESERVED_TOKENS = { 288 *Tokenizer.SINGLE_TOKENS.values(), 289 TokenType.SELECT, 290 } 291 292 DB_CREATABLES = { 293 TokenType.DATABASE, 294 TokenType.SCHEMA, 295 TokenType.TABLE, 296 TokenType.VIEW, 297 TokenType.MODEL, 298 TokenType.DICTIONARY, 299 TokenType.SEQUENCE, 300 TokenType.STORAGE_INTEGRATION, 301 } 302 303 CREATABLES = { 304 TokenType.COLUMN, 305 TokenType.CONSTRAINT, 306 TokenType.FUNCTION, 307 TokenType.INDEX, 308 TokenType.PROCEDURE, 309 TokenType.FOREIGN_KEY, 310 *DB_CREATABLES, 311 } 312 313 # Tokens that can represent identifiers 314 ID_VAR_TOKENS = { 315 TokenType.VAR, 316 TokenType.ANTI, 317 TokenType.APPLY, 318 TokenType.ASC, 319 TokenType.ASOF, 320 TokenType.AUTO_INCREMENT, 321 TokenType.BEGIN, 322 TokenType.BPCHAR, 323 TokenType.CACHE, 324 TokenType.CASE, 325 TokenType.COLLATE, 326 TokenType.COMMAND, 327 TokenType.COMMENT, 328 TokenType.COMMIT, 329 TokenType.CONSTRAINT, 330 TokenType.DEFAULT, 331 TokenType.DELETE, 332 TokenType.DESC, 333 TokenType.DESCRIBE, 334 TokenType.DICTIONARY, 335 TokenType.DIV, 336 TokenType.END, 337 TokenType.EXECUTE, 338 TokenType.ESCAPE, 339 TokenType.FALSE, 340 TokenType.FIRST, 341 TokenType.FILTER, 342 TokenType.FINAL, 343 TokenType.FORMAT, 344 TokenType.FULL, 345 TokenType.IS, 346 TokenType.ISNULL, 347 TokenType.INTERVAL, 348 TokenType.KEEP, 349 TokenType.KILL, 350 TokenType.LEFT, 351 TokenType.LOAD, 352 TokenType.MERGE, 353 TokenType.NATURAL, 354 TokenType.NEXT, 355 TokenType.OFFSET, 356 TokenType.OPERATOR, 357 TokenType.ORDINALITY, 358 TokenType.OVERLAPS, 359 TokenType.OVERWRITE, 360 TokenType.PARTITION, 361 TokenType.PERCENT, 362 TokenType.PIVOT, 363 TokenType.PRAGMA, 364 TokenType.RANGE, 365 TokenType.RECURSIVE, 366 TokenType.REFERENCES, 367 TokenType.REFRESH, 368 TokenType.REPLACE, 369 TokenType.RIGHT, 370 TokenType.ROW, 371 TokenType.ROWS, 372 TokenType.SEMI, 373 TokenType.SET, 374 TokenType.SETTINGS, 375 TokenType.SHOW, 376 TokenType.TEMPORARY, 377 TokenType.TOP, 378 TokenType.TRUE, 379 TokenType.TRUNCATE, 380 TokenType.UNIQUE, 381 TokenType.UNPIVOT, 382 TokenType.UPDATE, 383 TokenType.USE, 384 TokenType.VOLATILE, 385 TokenType.WINDOW, 386 *CREATABLES, 387 *SUBQUERY_PREDICATES, 388 *TYPE_TOKENS, 389 *NO_PAREN_FUNCTIONS, 390 } 391 392 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 393 394 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 395 TokenType.ANTI, 396 TokenType.APPLY, 397 TokenType.ASOF, 398 TokenType.FULL, 399 TokenType.LEFT, 400 TokenType.LOCK, 401 TokenType.NATURAL, 402 TokenType.OFFSET, 403 TokenType.RIGHT, 404 TokenType.SEMI, 405 TokenType.WINDOW, 406 } 407 408 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 409 410 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 411 412 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 413 414 FUNC_TOKENS = { 415 TokenType.COLLATE, 416 TokenType.COMMAND, 417 TokenType.CURRENT_DATE, 418 TokenType.CURRENT_DATETIME, 419 TokenType.CURRENT_TIMESTAMP, 420 TokenType.CURRENT_TIME, 421 TokenType.CURRENT_USER, 422 TokenType.FILTER, 423 TokenType.FIRST, 424 TokenType.FORMAT, 425 TokenType.GLOB, 426 TokenType.IDENTIFIER, 427 TokenType.INDEX, 428 TokenType.ISNULL, 429 TokenType.ILIKE, 430 TokenType.INSERT, 431 TokenType.LIKE, 432 TokenType.MERGE, 433 TokenType.OFFSET, 434 TokenType.PRIMARY_KEY, 435 TokenType.RANGE, 436 TokenType.REPLACE, 437 TokenType.RLIKE, 438 TokenType.ROW, 439 TokenType.UNNEST, 440 TokenType.VAR, 441 TokenType.LEFT, 442 TokenType.RIGHT, 443 TokenType.SEQUENCE, 444 TokenType.DATE, 445 TokenType.DATETIME, 446 TokenType.TABLE, 447 TokenType.TIMESTAMP, 448 TokenType.TIMESTAMPTZ, 449 TokenType.TRUNCATE, 450 TokenType.WINDOW, 451 TokenType.XOR, 452 *TYPE_TOKENS, 453 *SUBQUERY_PREDICATES, 454 } 455 456 CONJUNCTION = { 457 TokenType.AND: exp.And, 458 TokenType.OR: exp.Or, 459 } 460 461 EQUALITY = { 462 TokenType.COLON_EQ: exp.PropertyEQ, 463 TokenType.EQ: exp.EQ, 464 TokenType.NEQ: exp.NEQ, 465 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 466 } 467 468 COMPARISON = { 469 TokenType.GT: exp.GT, 470 TokenType.GTE: exp.GTE, 471 TokenType.LT: exp.LT, 472 TokenType.LTE: exp.LTE, 473 } 474 475 BITWISE = { 476 TokenType.AMP: exp.BitwiseAnd, 477 TokenType.CARET: exp.BitwiseXor, 478 TokenType.PIPE: exp.BitwiseOr, 479 } 480 481 TERM = { 482 TokenType.DASH: exp.Sub, 483 TokenType.PLUS: exp.Add, 484 TokenType.MOD: exp.Mod, 485 TokenType.COLLATE: exp.Collate, 486 } 487 488 FACTOR = { 489 TokenType.DIV: exp.IntDiv, 490 TokenType.LR_ARROW: exp.Distance, 491 TokenType.SLASH: exp.Div, 492 TokenType.STAR: exp.Mul, 493 } 494 495 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 496 497 TIMES = { 498 TokenType.TIME, 499 TokenType.TIMETZ, 500 } 501 502 TIMESTAMPS = { 503 TokenType.TIMESTAMP, 504 TokenType.TIMESTAMPTZ, 505 TokenType.TIMESTAMPLTZ, 506 *TIMES, 507 } 508 509 SET_OPERATIONS = { 510 TokenType.UNION, 511 TokenType.INTERSECT, 512 TokenType.EXCEPT, 513 } 514 515 JOIN_METHODS = { 516 TokenType.ASOF, 517 TokenType.NATURAL, 518 TokenType.POSITIONAL, 519 } 520 521 JOIN_SIDES = { 522 TokenType.LEFT, 523 TokenType.RIGHT, 524 TokenType.FULL, 525 } 526 527 JOIN_KINDS = { 528 TokenType.INNER, 529 TokenType.OUTER, 530 TokenType.CROSS, 531 TokenType.SEMI, 532 TokenType.ANTI, 533 } 534 535 JOIN_HINTS: t.Set[str] = set() 536 537 LAMBDAS = { 538 TokenType.ARROW: lambda self, expressions: self.expression( 539 exp.Lambda, 540 this=self._replace_lambda( 541 self._parse_conjunction(), 542 {node.name for node in expressions}, 543 ), 544 expressions=expressions, 545 ), 546 TokenType.FARROW: lambda self, expressions: self.expression( 547 exp.Kwarg, 548 this=exp.var(expressions[0].name), 549 expression=self._parse_conjunction(), 550 ), 551 } 552 553 COLUMN_OPERATORS = { 554 TokenType.DOT: None, 555 TokenType.DCOLON: lambda self, this, to: self.expression( 556 exp.Cast if self.STRICT_CAST else exp.TryCast, 557 this=this, 558 to=to, 559 ), 560 TokenType.ARROW: lambda self, this, path: self.expression( 561 exp.JSONExtract, 562 this=this, 563 expression=self.dialect.to_json_path(path), 564 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 565 ), 566 TokenType.DARROW: lambda self, this, path: self.expression( 567 exp.JSONExtractScalar, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 573 exp.JSONBExtract, 574 this=this, 575 expression=path, 576 ), 577 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtractScalar, 579 this=this, 580 expression=path, 581 ), 582 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 583 exp.JSONBContains, 584 this=this, 585 expression=key, 586 ), 587 } 588 589 EXPRESSION_PARSERS = { 590 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 591 exp.Column: lambda self: self._parse_column(), 592 exp.Condition: lambda self: self._parse_conjunction(), 593 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 594 exp.Expression: lambda self: self._parse_expression(), 595 exp.From: lambda self: self._parse_from(), 596 exp.Group: lambda self: self._parse_group(), 597 exp.Having: lambda self: self._parse_having(), 598 exp.Identifier: lambda self: self._parse_id_var(), 599 exp.Join: lambda self: self._parse_join(), 600 exp.Lambda: lambda self: self._parse_lambda(), 601 exp.Lateral: lambda self: self._parse_lateral(), 602 exp.Limit: lambda self: self._parse_limit(), 603 exp.Offset: lambda self: self._parse_offset(), 604 exp.Order: lambda self: self._parse_order(), 605 exp.Ordered: lambda self: self._parse_ordered(), 606 exp.Properties: lambda self: self._parse_properties(), 607 exp.Qualify: lambda self: self._parse_qualify(), 608 exp.Returning: lambda self: self._parse_returning(), 609 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 610 exp.Table: lambda self: self._parse_table_parts(), 611 exp.TableAlias: lambda self: self._parse_table_alias(), 612 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 613 exp.Where: lambda self: self._parse_where(), 614 exp.Window: lambda self: self._parse_named_window(), 615 exp.With: lambda self: self._parse_with(), 616 "JOIN_TYPE": lambda self: self._parse_join_parts(), 617 } 618 619 STATEMENT_PARSERS = { 620 TokenType.ALTER: lambda self: self._parse_alter(), 621 TokenType.BEGIN: lambda self: self._parse_transaction(), 622 TokenType.CACHE: lambda self: self._parse_cache(), 623 TokenType.COMMENT: lambda self: self._parse_comment(), 624 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 625 TokenType.CREATE: lambda self: self._parse_create(), 626 TokenType.DELETE: lambda self: self._parse_delete(), 627 TokenType.DESC: lambda self: self._parse_describe(), 628 TokenType.DESCRIBE: lambda self: self._parse_describe(), 629 TokenType.DROP: lambda self: self._parse_drop(), 630 TokenType.INSERT: lambda self: self._parse_insert(), 631 TokenType.KILL: lambda self: self._parse_kill(), 632 TokenType.LOAD: lambda self: self._parse_load(), 633 TokenType.MERGE: lambda self: self._parse_merge(), 634 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 635 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 636 TokenType.REFRESH: lambda self: self._parse_refresh(), 637 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 638 TokenType.SET: lambda self: self._parse_set(), 639 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 640 TokenType.UNCACHE: lambda self: self._parse_uncache(), 641 TokenType.UPDATE: lambda self: self._parse_update(), 642 TokenType.USE: lambda self: self.expression( 643 exp.Use, 644 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 645 this=self._parse_table(schema=False), 646 ), 647 } 648 649 UNARY_PARSERS = { 650 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 651 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 652 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 653 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 654 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 655 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 656 } 657 658 STRING_PARSERS = { 659 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 660 exp.RawString, this=token.text 661 ), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.STRING: lambda self, token: self.expression( 667 exp.Literal, this=token.text, is_string=True 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 } 675 676 NUMERIC_PARSERS = { 677 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 678 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 679 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 680 TokenType.NUMBER: lambda self, token: self.expression( 681 exp.Literal, this=token.text, is_string=False 682 ), 683 } 684 685 PRIMARY_PARSERS = { 686 **STRING_PARSERS, 687 **NUMERIC_PARSERS, 688 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 689 TokenType.NULL: lambda self, _: self.expression(exp.Null), 690 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 691 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 692 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 693 TokenType.STAR: lambda self, _: self.expression( 694 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 695 ), 696 } 697 698 PLACEHOLDER_PARSERS = { 699 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 700 TokenType.PARAMETER: lambda self: self._parse_parameter(), 701 TokenType.COLON: lambda self: ( 702 self.expression(exp.Placeholder, this=self._prev.text) 703 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 704 else None 705 ), 706 } 707 708 RANGE_PARSERS = { 709 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 710 TokenType.GLOB: binary_range_parser(exp.Glob), 711 TokenType.ILIKE: binary_range_parser(exp.ILike), 712 TokenType.IN: lambda self, this: self._parse_in(this), 713 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 714 TokenType.IS: lambda self, this: self._parse_is(this), 715 TokenType.LIKE: binary_range_parser(exp.Like), 716 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 717 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 718 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 719 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 720 } 721 722 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 723 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 724 "AUTO": lambda self: self._parse_auto_property(), 725 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 726 "BACKUP": lambda self: self.expression( 727 exp.BackupProperty, this=self._parse_var(any_token=True) 728 ), 729 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 730 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 731 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 732 "CHECKSUM": lambda self: self._parse_checksum(), 733 "CLUSTER BY": lambda self: self._parse_cluster(), 734 "CLUSTERED": lambda self: self._parse_clustered_by(), 735 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 736 exp.CollateProperty, **kwargs 737 ), 738 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 739 "CONTAINS": lambda self: self._parse_contains_property(), 740 "COPY": lambda self: self._parse_copy_property(), 741 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 742 "DEFINER": lambda self: self._parse_definer(), 743 "DETERMINISTIC": lambda self: self.expression( 744 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 745 ), 746 "DISTKEY": lambda self: self._parse_distkey(), 747 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 748 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 749 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 750 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 751 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 752 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 753 "FREESPACE": lambda self: self._parse_freespace(), 754 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 755 "HEAP": lambda self: self.expression(exp.HeapProperty), 756 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 757 "IMMUTABLE": lambda self: self.expression( 758 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 759 ), 760 "INHERITS": lambda self: self.expression( 761 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 762 ), 763 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 764 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 765 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 766 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 767 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 768 "LIKE": lambda self: self._parse_create_like(), 769 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 770 "LOCK": lambda self: self._parse_locking(), 771 "LOCKING": lambda self: self._parse_locking(), 772 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 773 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 774 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 775 "MODIFIES": lambda self: self._parse_modifies_property(), 776 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 777 "NO": lambda self: self._parse_no_property(), 778 "ON": lambda self: self._parse_on_property(), 779 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 780 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 781 "PARTITION": lambda self: self._parse_partitioned_of(), 782 "PARTITION BY": lambda self: self._parse_partitioned_by(), 783 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 784 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 785 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 786 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 787 "READS": lambda self: self._parse_reads_property(), 788 "REMOTE": lambda self: self._parse_remote_with_connection(), 789 "RETURNS": lambda self: self._parse_returns(), 790 "ROW": lambda self: self._parse_row(), 791 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 792 "SAMPLE": lambda self: self.expression( 793 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 794 ), 795 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 796 "SETTINGS": lambda self: self.expression( 797 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 798 ), 799 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 800 "SORTKEY": lambda self: self._parse_sortkey(), 801 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 802 "STABLE": lambda self: self.expression( 803 exp.StabilityProperty, this=exp.Literal.string("STABLE") 804 ), 805 "STORED": lambda self: self._parse_stored(), 806 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 807 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 808 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 809 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 810 "TO": lambda self: self._parse_to_table(), 811 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 812 "TRANSFORM": lambda self: self.expression( 813 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 814 ), 815 "TTL": lambda self: self._parse_ttl(), 816 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 817 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 818 "VOLATILE": lambda self: self._parse_volatile_property(), 819 "WITH": lambda self: self._parse_with_property(), 820 } 821 822 CONSTRAINT_PARSERS = { 823 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 824 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 825 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 826 "CHARACTER SET": lambda self: self.expression( 827 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 828 ), 829 "CHECK": lambda self: self.expression( 830 exp.CheckColumnConstraint, 831 this=self._parse_wrapped(self._parse_conjunction), 832 enforced=self._match_text_seq("ENFORCED"), 833 ), 834 "COLLATE": lambda self: self.expression( 835 exp.CollateColumnConstraint, this=self._parse_var() 836 ), 837 "COMMENT": lambda self: self.expression( 838 exp.CommentColumnConstraint, this=self._parse_string() 839 ), 840 "COMPRESS": lambda self: self._parse_compress(), 841 "CLUSTERED": lambda self: self.expression( 842 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 843 ), 844 "NONCLUSTERED": lambda self: self.expression( 845 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 846 ), 847 "DEFAULT": lambda self: self.expression( 848 exp.DefaultColumnConstraint, this=self._parse_bitwise() 849 ), 850 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 851 "EXCLUDE": lambda self: self.expression( 852 exp.ExcludeColumnConstraint, this=self._parse_index_params() 853 ), 854 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 855 "FORMAT": lambda self: self.expression( 856 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 857 ), 858 "GENERATED": lambda self: self._parse_generated_as_identity(), 859 "IDENTITY": lambda self: self._parse_auto_increment(), 860 "INLINE": lambda self: self._parse_inline(), 861 "LIKE": lambda self: self._parse_create_like(), 862 "NOT": lambda self: self._parse_not_constraint(), 863 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 864 "ON": lambda self: ( 865 self._match(TokenType.UPDATE) 866 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 867 ) 868 or self.expression(exp.OnProperty, this=self._parse_id_var()), 869 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 870 "PERIOD": lambda self: self._parse_period_for_system_time(), 871 "PRIMARY KEY": lambda self: self._parse_primary_key(), 872 "REFERENCES": lambda self: self._parse_references(match=False), 873 "TITLE": lambda self: self.expression( 874 exp.TitleColumnConstraint, this=self._parse_var_or_string() 875 ), 876 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 877 "UNIQUE": lambda self: self._parse_unique(), 878 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 879 "WITH": lambda self: self.expression( 880 exp.Properties, expressions=self._parse_wrapped_properties() 881 ), 882 } 883 884 ALTER_PARSERS = { 885 "ADD": lambda self: self._parse_alter_table_add(), 886 "ALTER": lambda self: self._parse_alter_table_alter(), 887 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 888 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 889 "DROP": lambda self: self._parse_alter_table_drop(), 890 "RENAME": lambda self: self._parse_alter_table_rename(), 891 } 892 893 SCHEMA_UNNAMED_CONSTRAINTS = { 894 "CHECK", 895 "EXCLUDE", 896 "FOREIGN KEY", 897 "LIKE", 898 "PERIOD", 899 "PRIMARY KEY", 900 "UNIQUE", 901 } 902 903 NO_PAREN_FUNCTION_PARSERS = { 904 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 905 "CASE": lambda self: self._parse_case(), 906 "IF": lambda self: self._parse_if(), 907 "NEXT": lambda self: self._parse_next_value_for(), 908 } 909 910 INVALID_FUNC_NAME_TOKENS = { 911 TokenType.IDENTIFIER, 912 TokenType.STRING, 913 } 914 915 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 916 917 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 918 919 FUNCTION_PARSERS = { 920 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 921 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 922 "DECODE": lambda self: self._parse_decode(), 923 "EXTRACT": lambda self: self._parse_extract(), 924 "JSON_OBJECT": lambda self: self._parse_json_object(), 925 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 926 "JSON_TABLE": lambda self: self._parse_json_table(), 927 "MATCH": lambda self: self._parse_match_against(), 928 "OPENJSON": lambda self: self._parse_open_json(), 929 "POSITION": lambda self: self._parse_position(), 930 "PREDICT": lambda self: self._parse_predict(), 931 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 932 "STRING_AGG": lambda self: self._parse_string_agg(), 933 "SUBSTRING": lambda self: self._parse_substring(), 934 "TRIM": lambda self: self._parse_trim(), 935 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 936 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 937 } 938 939 QUERY_MODIFIER_PARSERS = { 940 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 941 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 942 TokenType.WHERE: lambda self: ("where", self._parse_where()), 943 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 944 TokenType.HAVING: lambda self: ("having", self._parse_having()), 945 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 946 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 947 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 948 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 949 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 950 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 951 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 952 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 953 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 954 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 955 TokenType.CLUSTER_BY: lambda self: ( 956 "cluster", 957 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 958 ), 959 TokenType.DISTRIBUTE_BY: lambda self: ( 960 "distribute", 961 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 962 ), 963 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 964 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 965 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 966 } 967 968 SET_PARSERS = { 969 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 970 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 971 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 972 "TRANSACTION": lambda self: self._parse_set_transaction(), 973 } 974 975 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 976 977 TYPE_LITERAL_PARSERS = { 978 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 979 } 980 981 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 982 983 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 984 985 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 986 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 987 "ISOLATION": ( 988 ("LEVEL", "REPEATABLE", "READ"), 989 ("LEVEL", "READ", "COMMITTED"), 990 ("LEVEL", "READ", "UNCOMITTED"), 991 ("LEVEL", "SERIALIZABLE"), 992 ), 993 "READ": ("WRITE", "ONLY"), 994 } 995 996 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 997 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 998 ) 999 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1000 1001 CREATE_SEQUENCE: OPTIONS_TYPE = { 1002 "SCALE": ("EXTEND", "NOEXTEND"), 1003 "SHARD": ("EXTEND", "NOEXTEND"), 1004 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1005 **dict.fromkeys( 1006 ( 1007 "SESSION", 1008 "GLOBAL", 1009 "KEEP", 1010 "NOKEEP", 1011 "ORDER", 1012 "NOORDER", 1013 "NOCACHE", 1014 "CYCLE", 1015 "NOCYCLE", 1016 "NOMINVALUE", 1017 "NOMAXVALUE", 1018 "NOSCALE", 1019 "NOSHARD", 1020 ), 1021 tuple(), 1022 ), 1023 } 1024 1025 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1026 1027 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1028 1029 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1030 1031 CLONE_KEYWORDS = {"CLONE", "COPY"} 1032 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1033 1034 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1035 1036 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1037 1038 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1039 1040 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1041 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1042 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1043 1044 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1045 1046 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1047 1048 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1049 1050 DISTINCT_TOKENS = {TokenType.DISTINCT} 1051 1052 NULL_TOKENS = {TokenType.NULL} 1053 1054 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1055 1056 STRICT_CAST = True 1057 1058 PREFIXED_PIVOT_COLUMNS = False 1059 IDENTIFY_PIVOT_STRINGS = False 1060 1061 LOG_DEFAULTS_TO_LN = False 1062 1063 # Whether ADD is present for each column added by ALTER TABLE 1064 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1065 1066 # Whether the table sample clause expects CSV syntax 1067 TABLESAMPLE_CSV = False 1068 1069 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1070 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1071 1072 # Whether the TRIM function expects the characters to trim as its first argument 1073 TRIM_PATTERN_FIRST = False 1074 1075 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1076 STRING_ALIASES = False 1077 1078 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1079 MODIFIERS_ATTACHED_TO_UNION = True 1080 UNION_MODIFIERS = {"order", "limit", "offset"} 1081 1082 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1083 NO_PAREN_IF_COMMANDS = True 1084 1085 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1086 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1087 1088 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1089 # If this is True and '(' is not found, the keyword will be treated as an identifier 1090 VALUES_FOLLOWED_BY_PAREN = True 1091 1092 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1093 SUPPORTS_IMPLICIT_UNNEST = False 1094 1095 __slots__ = ( 1096 "error_level", 1097 "error_message_context", 1098 "max_errors", 1099 "dialect", 1100 "sql", 1101 "errors", 1102 "_tokens", 1103 "_index", 1104 "_curr", 1105 "_next", 1106 "_prev", 1107 "_prev_comments", 1108 ) 1109 1110 # Autofilled 1111 SHOW_TRIE: t.Dict = {} 1112 SET_TRIE: t.Dict = {} 1113 1114 def __init__( 1115 self, 1116 error_level: t.Optional[ErrorLevel] = None, 1117 error_message_context: int = 100, 1118 max_errors: int = 3, 1119 dialect: DialectType = None, 1120 ): 1121 from sqlglot.dialects import Dialect 1122 1123 self.error_level = error_level or ErrorLevel.IMMEDIATE 1124 self.error_message_context = error_message_context 1125 self.max_errors = max_errors 1126 self.dialect = Dialect.get_or_raise(dialect) 1127 self.reset() 1128 1129 def reset(self): 1130 self.sql = "" 1131 self.errors = [] 1132 self._tokens = [] 1133 self._index = 0 1134 self._curr = None 1135 self._next = None 1136 self._prev = None 1137 self._prev_comments = None 1138 1139 def parse( 1140 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1141 ) -> t.List[t.Optional[exp.Expression]]: 1142 """ 1143 Parses a list of tokens and returns a list of syntax trees, one tree 1144 per parsed SQL statement. 1145 1146 Args: 1147 raw_tokens: The list of tokens. 1148 sql: The original SQL string, used to produce helpful debug messages. 1149 1150 Returns: 1151 The list of the produced syntax trees. 1152 """ 1153 return self._parse( 1154 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1155 ) 1156 1157 def parse_into( 1158 self, 1159 expression_types: exp.IntoType, 1160 raw_tokens: t.List[Token], 1161 sql: t.Optional[str] = None, 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens into a given Expression type. If a collection of Expression 1165 types is given instead, this method will try to parse the token list into each one 1166 of them, stopping at the first for which the parsing succeeds. 1167 1168 Args: 1169 expression_types: The expression type(s) to try and parse the token list into. 1170 raw_tokens: The list of tokens. 1171 sql: The original SQL string, used to produce helpful debug messages. 1172 1173 Returns: 1174 The target Expression. 1175 """ 1176 errors = [] 1177 for expression_type in ensure_list(expression_types): 1178 parser = self.EXPRESSION_PARSERS.get(expression_type) 1179 if not parser: 1180 raise TypeError(f"No parser registered for {expression_type}") 1181 1182 try: 1183 return self._parse(parser, raw_tokens, sql) 1184 except ParseError as e: 1185 e.errors[0]["into_expression"] = expression_type 1186 errors.append(e) 1187 1188 raise ParseError( 1189 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1190 errors=merge_errors(errors), 1191 ) from errors[-1] 1192 1193 def _parse( 1194 self, 1195 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1196 raw_tokens: t.List[Token], 1197 sql: t.Optional[str] = None, 1198 ) -> t.List[t.Optional[exp.Expression]]: 1199 self.reset() 1200 self.sql = sql or "" 1201 1202 total = len(raw_tokens) 1203 chunks: t.List[t.List[Token]] = [[]] 1204 1205 for i, token in enumerate(raw_tokens): 1206 if token.token_type == TokenType.SEMICOLON: 1207 if i < total - 1: 1208 chunks.append([]) 1209 else: 1210 chunks[-1].append(token) 1211 1212 expressions = [] 1213 1214 for tokens in chunks: 1215 self._index = -1 1216 self._tokens = tokens 1217 self._advance() 1218 1219 expressions.append(parse_method(self)) 1220 1221 if self._index < len(self._tokens): 1222 self.raise_error("Invalid expression / Unexpected token") 1223 1224 self.check_errors() 1225 1226 return expressions 1227 1228 def check_errors(self) -> None: 1229 """Logs or raises any found errors, depending on the chosen error level setting.""" 1230 if self.error_level == ErrorLevel.WARN: 1231 for error in self.errors: 1232 logger.error(str(error)) 1233 elif self.error_level == ErrorLevel.RAISE and self.errors: 1234 raise ParseError( 1235 concat_messages(self.errors, self.max_errors), 1236 errors=merge_errors(self.errors), 1237 ) 1238 1239 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1240 """ 1241 Appends an error in the list of recorded errors or raises it, depending on the chosen 1242 error level setting. 1243 """ 1244 token = token or self._curr or self._prev or Token.string("") 1245 start = token.start 1246 end = token.end + 1 1247 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1248 highlight = self.sql[start:end] 1249 end_context = self.sql[end : end + self.error_message_context] 1250 1251 error = ParseError.new( 1252 f"{message}. Line {token.line}, Col: {token.col}.\n" 1253 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1254 description=message, 1255 line=token.line, 1256 col=token.col, 1257 start_context=start_context, 1258 highlight=highlight, 1259 end_context=end_context, 1260 ) 1261 1262 if self.error_level == ErrorLevel.IMMEDIATE: 1263 raise error 1264 1265 self.errors.append(error) 1266 1267 def expression( 1268 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1269 ) -> E: 1270 """ 1271 Creates a new, validated Expression. 1272 1273 Args: 1274 exp_class: The expression class to instantiate. 1275 comments: An optional list of comments to attach to the expression. 1276 kwargs: The arguments to set for the expression along with their respective values. 1277 1278 Returns: 1279 The target expression. 1280 """ 1281 instance = exp_class(**kwargs) 1282 instance.add_comments(comments) if comments else self._add_comments(instance) 1283 return self.validate_expression(instance) 1284 1285 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1286 if expression and self._prev_comments: 1287 expression.add_comments(self._prev_comments) 1288 self._prev_comments = None 1289 1290 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1291 """ 1292 Validates an Expression, making sure that all its mandatory arguments are set. 1293 1294 Args: 1295 expression: The expression to validate. 1296 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1297 1298 Returns: 1299 The validated expression. 1300 """ 1301 if self.error_level != ErrorLevel.IGNORE: 1302 for error_message in expression.error_messages(args): 1303 self.raise_error(error_message) 1304 1305 return expression 1306 1307 def _find_sql(self, start: Token, end: Token) -> str: 1308 return self.sql[start.start : end.end + 1] 1309 1310 def _is_connected(self) -> bool: 1311 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1312 1313 def _advance(self, times: int = 1) -> None: 1314 self._index += times 1315 self._curr = seq_get(self._tokens, self._index) 1316 self._next = seq_get(self._tokens, self._index + 1) 1317 1318 if self._index > 0: 1319 self._prev = self._tokens[self._index - 1] 1320 self._prev_comments = self._prev.comments 1321 else: 1322 self._prev = None 1323 self._prev_comments = None 1324 1325 def _retreat(self, index: int) -> None: 1326 if index != self._index: 1327 self._advance(index - self._index) 1328 1329 def _warn_unsupported(self) -> None: 1330 if len(self._tokens) <= 1: 1331 return 1332 1333 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1334 # interested in emitting a warning for the one being currently processed. 1335 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1336 1337 logger.warning( 1338 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1339 ) 1340 1341 def _parse_command(self) -> exp.Command: 1342 self._warn_unsupported() 1343 return self.expression( 1344 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1345 ) 1346 1347 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1348 start = self._prev 1349 exists = self._parse_exists() if allow_exists else None 1350 1351 self._match(TokenType.ON) 1352 1353 kind = self._match_set(self.CREATABLES) and self._prev 1354 if not kind: 1355 return self._parse_as_command(start) 1356 1357 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1358 this = self._parse_user_defined_function(kind=kind.token_type) 1359 elif kind.token_type == TokenType.TABLE: 1360 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1361 elif kind.token_type == TokenType.COLUMN: 1362 this = self._parse_column() 1363 else: 1364 this = self._parse_id_var() 1365 1366 self._match(TokenType.IS) 1367 1368 return self.expression( 1369 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1370 ) 1371 1372 def _parse_to_table( 1373 self, 1374 ) -> exp.ToTableProperty: 1375 table = self._parse_table_parts(schema=True) 1376 return self.expression(exp.ToTableProperty, this=table) 1377 1378 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1379 def _parse_ttl(self) -> exp.Expression: 1380 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1381 this = self._parse_bitwise() 1382 1383 if self._match_text_seq("DELETE"): 1384 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1385 if self._match_text_seq("RECOMPRESS"): 1386 return self.expression( 1387 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1388 ) 1389 if self._match_text_seq("TO", "DISK"): 1390 return self.expression( 1391 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1392 ) 1393 if self._match_text_seq("TO", "VOLUME"): 1394 return self.expression( 1395 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1396 ) 1397 1398 return this 1399 1400 expressions = self._parse_csv(_parse_ttl_action) 1401 where = self._parse_where() 1402 group = self._parse_group() 1403 1404 aggregates = None 1405 if group and self._match(TokenType.SET): 1406 aggregates = self._parse_csv(self._parse_set_item) 1407 1408 return self.expression( 1409 exp.MergeTreeTTL, 1410 expressions=expressions, 1411 where=where, 1412 group=group, 1413 aggregates=aggregates, 1414 ) 1415 1416 def _parse_statement(self) -> t.Optional[exp.Expression]: 1417 if self._curr is None: 1418 return None 1419 1420 if self._match_set(self.STATEMENT_PARSERS): 1421 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1422 1423 if self._match_set(Tokenizer.COMMANDS): 1424 return self._parse_command() 1425 1426 expression = self._parse_expression() 1427 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1428 return self._parse_query_modifiers(expression) 1429 1430 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1431 start = self._prev 1432 temporary = self._match(TokenType.TEMPORARY) 1433 materialized = self._match_text_seq("MATERIALIZED") 1434 1435 kind = self._match_set(self.CREATABLES) and self._prev.text 1436 if not kind: 1437 return self._parse_as_command(start) 1438 1439 if_exists = exists or self._parse_exists() 1440 table = self._parse_table_parts( 1441 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1442 ) 1443 1444 if self._match(TokenType.L_PAREN, advance=False): 1445 expressions = self._parse_wrapped_csv(self._parse_types) 1446 else: 1447 expressions = None 1448 1449 return self.expression( 1450 exp.Drop, 1451 comments=start.comments, 1452 exists=if_exists, 1453 this=table, 1454 expressions=expressions, 1455 kind=kind, 1456 temporary=temporary, 1457 materialized=materialized, 1458 cascade=self._match_text_seq("CASCADE"), 1459 constraints=self._match_text_seq("CONSTRAINTS"), 1460 purge=self._match_text_seq("PURGE"), 1461 ) 1462 1463 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1464 return ( 1465 self._match_text_seq("IF") 1466 and (not not_ or self._match(TokenType.NOT)) 1467 and self._match(TokenType.EXISTS) 1468 ) 1469 1470 def _parse_create(self) -> exp.Create | exp.Command: 1471 # Note: this can't be None because we've matched a statement parser 1472 start = self._prev 1473 comments = self._prev_comments 1474 1475 replace = ( 1476 start.token_type == TokenType.REPLACE 1477 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1478 or self._match_pair(TokenType.OR, TokenType.ALTER) 1479 ) 1480 1481 unique = self._match(TokenType.UNIQUE) 1482 1483 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1484 self._advance() 1485 1486 properties = None 1487 create_token = self._match_set(self.CREATABLES) and self._prev 1488 1489 if not create_token: 1490 # exp.Properties.Location.POST_CREATE 1491 properties = self._parse_properties() 1492 create_token = self._match_set(self.CREATABLES) and self._prev 1493 1494 if not properties or not create_token: 1495 return self._parse_as_command(start) 1496 1497 exists = self._parse_exists(not_=True) 1498 this = None 1499 expression: t.Optional[exp.Expression] = None 1500 indexes = None 1501 no_schema_binding = None 1502 begin = None 1503 end = None 1504 clone = None 1505 1506 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1507 nonlocal properties 1508 if properties and temp_props: 1509 properties.expressions.extend(temp_props.expressions) 1510 elif temp_props: 1511 properties = temp_props 1512 1513 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1514 this = self._parse_user_defined_function(kind=create_token.token_type) 1515 1516 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1517 extend_props(self._parse_properties()) 1518 1519 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1520 1521 if not expression: 1522 if self._match(TokenType.COMMAND): 1523 expression = self._parse_as_command(self._prev) 1524 else: 1525 begin = self._match(TokenType.BEGIN) 1526 return_ = self._match_text_seq("RETURN") 1527 1528 if self._match(TokenType.STRING, advance=False): 1529 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1530 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1531 expression = self._parse_string() 1532 extend_props(self._parse_properties()) 1533 else: 1534 expression = self._parse_statement() 1535 1536 end = self._match_text_seq("END") 1537 1538 if return_: 1539 expression = self.expression(exp.Return, this=expression) 1540 elif create_token.token_type == TokenType.INDEX: 1541 this = self._parse_index(index=self._parse_id_var()) 1542 elif create_token.token_type in self.DB_CREATABLES: 1543 table_parts = self._parse_table_parts( 1544 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1545 ) 1546 1547 # exp.Properties.Location.POST_NAME 1548 self._match(TokenType.COMMA) 1549 extend_props(self._parse_properties(before=True)) 1550 1551 this = self._parse_schema(this=table_parts) 1552 1553 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1554 extend_props(self._parse_properties()) 1555 1556 self._match(TokenType.ALIAS) 1557 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1558 # exp.Properties.Location.POST_ALIAS 1559 extend_props(self._parse_properties()) 1560 1561 if create_token.token_type == TokenType.SEQUENCE: 1562 expression = self._parse_types() 1563 extend_props(self._parse_properties()) 1564 else: 1565 expression = self._parse_ddl_select() 1566 1567 if create_token.token_type == TokenType.TABLE: 1568 # exp.Properties.Location.POST_EXPRESSION 1569 extend_props(self._parse_properties()) 1570 1571 indexes = [] 1572 while True: 1573 index = self._parse_index() 1574 1575 # exp.Properties.Location.POST_INDEX 1576 extend_props(self._parse_properties()) 1577 1578 if not index: 1579 break 1580 else: 1581 self._match(TokenType.COMMA) 1582 indexes.append(index) 1583 elif create_token.token_type == TokenType.VIEW: 1584 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1585 no_schema_binding = True 1586 1587 shallow = self._match_text_seq("SHALLOW") 1588 1589 if self._match_texts(self.CLONE_KEYWORDS): 1590 copy = self._prev.text.lower() == "copy" 1591 clone = self.expression( 1592 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1593 ) 1594 1595 if self._curr: 1596 return self._parse_as_command(start) 1597 1598 return self.expression( 1599 exp.Create, 1600 comments=comments, 1601 this=this, 1602 kind=create_token.text.upper(), 1603 replace=replace, 1604 unique=unique, 1605 expression=expression, 1606 exists=exists, 1607 properties=properties, 1608 indexes=indexes, 1609 no_schema_binding=no_schema_binding, 1610 begin=begin, 1611 end=end, 1612 clone=clone, 1613 ) 1614 1615 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1616 seq = exp.SequenceProperties() 1617 1618 options = [] 1619 index = self._index 1620 1621 while self._curr: 1622 if self._match_text_seq("INCREMENT"): 1623 self._match_text_seq("BY") 1624 self._match_text_seq("=") 1625 seq.set("increment", self._parse_term()) 1626 elif self._match_text_seq("MINVALUE"): 1627 seq.set("minvalue", self._parse_term()) 1628 elif self._match_text_seq("MAXVALUE"): 1629 seq.set("maxvalue", self._parse_term()) 1630 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1631 self._match_text_seq("=") 1632 seq.set("start", self._parse_term()) 1633 elif self._match_text_seq("CACHE"): 1634 # T-SQL allows empty CACHE which is initialized dynamically 1635 seq.set("cache", self._parse_number() or True) 1636 elif self._match_text_seq("OWNED", "BY"): 1637 # "OWNED BY NONE" is the default 1638 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1639 else: 1640 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1641 if opt: 1642 options.append(opt) 1643 else: 1644 break 1645 1646 seq.set("options", options if options else None) 1647 return None if self._index == index else seq 1648 1649 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1650 # only used for teradata currently 1651 self._match(TokenType.COMMA) 1652 1653 kwargs = { 1654 "no": self._match_text_seq("NO"), 1655 "dual": self._match_text_seq("DUAL"), 1656 "before": self._match_text_seq("BEFORE"), 1657 "default": self._match_text_seq("DEFAULT"), 1658 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1659 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1660 "after": self._match_text_seq("AFTER"), 1661 "minimum": self._match_texts(("MIN", "MINIMUM")), 1662 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1663 } 1664 1665 if self._match_texts(self.PROPERTY_PARSERS): 1666 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1667 try: 1668 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1669 except TypeError: 1670 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1671 1672 return None 1673 1674 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1675 return self._parse_wrapped_csv(self._parse_property) 1676 1677 def _parse_property(self) -> t.Optional[exp.Expression]: 1678 if self._match_texts(self.PROPERTY_PARSERS): 1679 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1680 1681 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1682 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1683 1684 if self._match_text_seq("COMPOUND", "SORTKEY"): 1685 return self._parse_sortkey(compound=True) 1686 1687 if self._match_text_seq("SQL", "SECURITY"): 1688 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1689 1690 index = self._index 1691 key = self._parse_column() 1692 1693 if not self._match(TokenType.EQ): 1694 self._retreat(index) 1695 return self._parse_sequence_properties() 1696 1697 return self.expression( 1698 exp.Property, 1699 this=key.to_dot() if isinstance(key, exp.Column) else key, 1700 value=self._parse_column() or self._parse_var(any_token=True), 1701 ) 1702 1703 def _parse_stored(self) -> exp.FileFormatProperty: 1704 self._match(TokenType.ALIAS) 1705 1706 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1707 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1708 1709 return self.expression( 1710 exp.FileFormatProperty, 1711 this=( 1712 self.expression( 1713 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1714 ) 1715 if input_format or output_format 1716 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1717 ), 1718 ) 1719 1720 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1721 self._match(TokenType.EQ) 1722 self._match(TokenType.ALIAS) 1723 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1724 1725 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1726 properties = [] 1727 while True: 1728 if before: 1729 prop = self._parse_property_before() 1730 else: 1731 prop = self._parse_property() 1732 if not prop: 1733 break 1734 for p in ensure_list(prop): 1735 properties.append(p) 1736 1737 if properties: 1738 return self.expression(exp.Properties, expressions=properties) 1739 1740 return None 1741 1742 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1743 return self.expression( 1744 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1745 ) 1746 1747 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1748 if self._index >= 2: 1749 pre_volatile_token = self._tokens[self._index - 2] 1750 else: 1751 pre_volatile_token = None 1752 1753 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1754 return exp.VolatileProperty() 1755 1756 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1757 1758 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1759 self._match_pair(TokenType.EQ, TokenType.ON) 1760 1761 prop = self.expression(exp.WithSystemVersioningProperty) 1762 if self._match(TokenType.L_PAREN): 1763 self._match_text_seq("HISTORY_TABLE", "=") 1764 prop.set("this", self._parse_table_parts()) 1765 1766 if self._match(TokenType.COMMA): 1767 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1768 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1769 1770 self._match_r_paren() 1771 1772 return prop 1773 1774 def _parse_with_property( 1775 self, 1776 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1777 if self._match(TokenType.L_PAREN, advance=False): 1778 return self._parse_wrapped_properties() 1779 1780 if self._match_text_seq("JOURNAL"): 1781 return self._parse_withjournaltable() 1782 1783 if self._match_text_seq("DATA"): 1784 return self._parse_withdata(no=False) 1785 elif self._match_text_seq("NO", "DATA"): 1786 return self._parse_withdata(no=True) 1787 1788 if not self._next: 1789 return None 1790 1791 return self._parse_withisolatedloading() 1792 1793 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1794 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1795 self._match(TokenType.EQ) 1796 1797 user = self._parse_id_var() 1798 self._match(TokenType.PARAMETER) 1799 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1800 1801 if not user or not host: 1802 return None 1803 1804 return exp.DefinerProperty(this=f"{user}@{host}") 1805 1806 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1807 self._match(TokenType.TABLE) 1808 self._match(TokenType.EQ) 1809 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1810 1811 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1812 return self.expression(exp.LogProperty, no=no) 1813 1814 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1815 return self.expression(exp.JournalProperty, **kwargs) 1816 1817 def _parse_checksum(self) -> exp.ChecksumProperty: 1818 self._match(TokenType.EQ) 1819 1820 on = None 1821 if self._match(TokenType.ON): 1822 on = True 1823 elif self._match_text_seq("OFF"): 1824 on = False 1825 1826 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1827 1828 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1829 return self.expression( 1830 exp.Cluster, 1831 expressions=( 1832 self._parse_wrapped_csv(self._parse_ordered) 1833 if wrapped 1834 else self._parse_csv(self._parse_ordered) 1835 ), 1836 ) 1837 1838 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1839 self._match_text_seq("BY") 1840 1841 self._match_l_paren() 1842 expressions = self._parse_csv(self._parse_column) 1843 self._match_r_paren() 1844 1845 if self._match_text_seq("SORTED", "BY"): 1846 self._match_l_paren() 1847 sorted_by = self._parse_csv(self._parse_ordered) 1848 self._match_r_paren() 1849 else: 1850 sorted_by = None 1851 1852 self._match(TokenType.INTO) 1853 buckets = self._parse_number() 1854 self._match_text_seq("BUCKETS") 1855 1856 return self.expression( 1857 exp.ClusteredByProperty, 1858 expressions=expressions, 1859 sorted_by=sorted_by, 1860 buckets=buckets, 1861 ) 1862 1863 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1864 if not self._match_text_seq("GRANTS"): 1865 self._retreat(self._index - 1) 1866 return None 1867 1868 return self.expression(exp.CopyGrantsProperty) 1869 1870 def _parse_freespace(self) -> exp.FreespaceProperty: 1871 self._match(TokenType.EQ) 1872 return self.expression( 1873 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1874 ) 1875 1876 def _parse_mergeblockratio( 1877 self, no: bool = False, default: bool = False 1878 ) -> exp.MergeBlockRatioProperty: 1879 if self._match(TokenType.EQ): 1880 return self.expression( 1881 exp.MergeBlockRatioProperty, 1882 this=self._parse_number(), 1883 percent=self._match(TokenType.PERCENT), 1884 ) 1885 1886 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1887 1888 def _parse_datablocksize( 1889 self, 1890 default: t.Optional[bool] = None, 1891 minimum: t.Optional[bool] = None, 1892 maximum: t.Optional[bool] = None, 1893 ) -> exp.DataBlocksizeProperty: 1894 self._match(TokenType.EQ) 1895 size = self._parse_number() 1896 1897 units = None 1898 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1899 units = self._prev.text 1900 1901 return self.expression( 1902 exp.DataBlocksizeProperty, 1903 size=size, 1904 units=units, 1905 default=default, 1906 minimum=minimum, 1907 maximum=maximum, 1908 ) 1909 1910 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1911 self._match(TokenType.EQ) 1912 always = self._match_text_seq("ALWAYS") 1913 manual = self._match_text_seq("MANUAL") 1914 never = self._match_text_seq("NEVER") 1915 default = self._match_text_seq("DEFAULT") 1916 1917 autotemp = None 1918 if self._match_text_seq("AUTOTEMP"): 1919 autotemp = self._parse_schema() 1920 1921 return self.expression( 1922 exp.BlockCompressionProperty, 1923 always=always, 1924 manual=manual, 1925 never=never, 1926 default=default, 1927 autotemp=autotemp, 1928 ) 1929 1930 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1931 no = self._match_text_seq("NO") 1932 concurrent = self._match_text_seq("CONCURRENT") 1933 self._match_text_seq("ISOLATED", "LOADING") 1934 for_all = self._match_text_seq("FOR", "ALL") 1935 for_insert = self._match_text_seq("FOR", "INSERT") 1936 for_none = self._match_text_seq("FOR", "NONE") 1937 return self.expression( 1938 exp.IsolatedLoadingProperty, 1939 no=no, 1940 concurrent=concurrent, 1941 for_all=for_all, 1942 for_insert=for_insert, 1943 for_none=for_none, 1944 ) 1945 1946 def _parse_locking(self) -> exp.LockingProperty: 1947 if self._match(TokenType.TABLE): 1948 kind = "TABLE" 1949 elif self._match(TokenType.VIEW): 1950 kind = "VIEW" 1951 elif self._match(TokenType.ROW): 1952 kind = "ROW" 1953 elif self._match_text_seq("DATABASE"): 1954 kind = "DATABASE" 1955 else: 1956 kind = None 1957 1958 if kind in ("DATABASE", "TABLE", "VIEW"): 1959 this = self._parse_table_parts() 1960 else: 1961 this = None 1962 1963 if self._match(TokenType.FOR): 1964 for_or_in = "FOR" 1965 elif self._match(TokenType.IN): 1966 for_or_in = "IN" 1967 else: 1968 for_or_in = None 1969 1970 if self._match_text_seq("ACCESS"): 1971 lock_type = "ACCESS" 1972 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1973 lock_type = "EXCLUSIVE" 1974 elif self._match_text_seq("SHARE"): 1975 lock_type = "SHARE" 1976 elif self._match_text_seq("READ"): 1977 lock_type = "READ" 1978 elif self._match_text_seq("WRITE"): 1979 lock_type = "WRITE" 1980 elif self._match_text_seq("CHECKSUM"): 1981 lock_type = "CHECKSUM" 1982 else: 1983 lock_type = None 1984 1985 override = self._match_text_seq("OVERRIDE") 1986 1987 return self.expression( 1988 exp.LockingProperty, 1989 this=this, 1990 kind=kind, 1991 for_or_in=for_or_in, 1992 lock_type=lock_type, 1993 override=override, 1994 ) 1995 1996 def _parse_partition_by(self) -> t.List[exp.Expression]: 1997 if self._match(TokenType.PARTITION_BY): 1998 return self._parse_csv(self._parse_conjunction) 1999 return [] 2000 2001 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2002 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2003 if self._match_text_seq("MINVALUE"): 2004 return exp.var("MINVALUE") 2005 if self._match_text_seq("MAXVALUE"): 2006 return exp.var("MAXVALUE") 2007 return self._parse_bitwise() 2008 2009 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2010 expression = None 2011 from_expressions = None 2012 to_expressions = None 2013 2014 if self._match(TokenType.IN): 2015 this = self._parse_wrapped_csv(self._parse_bitwise) 2016 elif self._match(TokenType.FROM): 2017 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2018 self._match_text_seq("TO") 2019 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2020 elif self._match_text_seq("WITH", "(", "MODULUS"): 2021 this = self._parse_number() 2022 self._match_text_seq(",", "REMAINDER") 2023 expression = self._parse_number() 2024 self._match_r_paren() 2025 else: 2026 self.raise_error("Failed to parse partition bound spec.") 2027 2028 return self.expression( 2029 exp.PartitionBoundSpec, 2030 this=this, 2031 expression=expression, 2032 from_expressions=from_expressions, 2033 to_expressions=to_expressions, 2034 ) 2035 2036 # https://www.postgresql.org/docs/current/sql-createtable.html 2037 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2038 if not self._match_text_seq("OF"): 2039 self._retreat(self._index - 1) 2040 return None 2041 2042 this = self._parse_table(schema=True) 2043 2044 if self._match(TokenType.DEFAULT): 2045 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2046 elif self._match_text_seq("FOR", "VALUES"): 2047 expression = self._parse_partition_bound_spec() 2048 else: 2049 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2050 2051 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2052 2053 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2054 self._match(TokenType.EQ) 2055 return self.expression( 2056 exp.PartitionedByProperty, 2057 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2058 ) 2059 2060 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2061 if self._match_text_seq("AND", "STATISTICS"): 2062 statistics = True 2063 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2064 statistics = False 2065 else: 2066 statistics = None 2067 2068 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2069 2070 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2071 if self._match_text_seq("SQL"): 2072 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2073 return None 2074 2075 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2076 if self._match_text_seq("SQL", "DATA"): 2077 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2078 return None 2079 2080 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2081 if self._match_text_seq("PRIMARY", "INDEX"): 2082 return exp.NoPrimaryIndexProperty() 2083 if self._match_text_seq("SQL"): 2084 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2085 return None 2086 2087 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2088 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2089 return exp.OnCommitProperty() 2090 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2091 return exp.OnCommitProperty(delete=True) 2092 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2093 2094 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2095 if self._match_text_seq("SQL", "DATA"): 2096 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2097 return None 2098 2099 def _parse_distkey(self) -> exp.DistKeyProperty: 2100 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2101 2102 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2103 table = self._parse_table(schema=True) 2104 2105 options = [] 2106 while self._match_texts(("INCLUDING", "EXCLUDING")): 2107 this = self._prev.text.upper() 2108 2109 id_var = self._parse_id_var() 2110 if not id_var: 2111 return None 2112 2113 options.append( 2114 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2115 ) 2116 2117 return self.expression(exp.LikeProperty, this=table, expressions=options) 2118 2119 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2120 return self.expression( 2121 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2122 ) 2123 2124 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2125 self._match(TokenType.EQ) 2126 return self.expression( 2127 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2128 ) 2129 2130 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2131 self._match_text_seq("WITH", "CONNECTION") 2132 return self.expression( 2133 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2134 ) 2135 2136 def _parse_returns(self) -> exp.ReturnsProperty: 2137 value: t.Optional[exp.Expression] 2138 is_table = self._match(TokenType.TABLE) 2139 2140 if is_table: 2141 if self._match(TokenType.LT): 2142 value = self.expression( 2143 exp.Schema, 2144 this="TABLE", 2145 expressions=self._parse_csv(self._parse_struct_types), 2146 ) 2147 if not self._match(TokenType.GT): 2148 self.raise_error("Expecting >") 2149 else: 2150 value = self._parse_schema(exp.var("TABLE")) 2151 else: 2152 value = self._parse_types() 2153 2154 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2155 2156 def _parse_describe(self) -> exp.Describe: 2157 kind = self._match_set(self.CREATABLES) and self._prev.text 2158 extended = self._match_text_seq("EXTENDED") 2159 this = self._parse_table(schema=True) 2160 properties = self._parse_properties() 2161 expressions = properties.expressions if properties else None 2162 return self.expression( 2163 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2164 ) 2165 2166 def _parse_insert(self) -> exp.Insert: 2167 comments = ensure_list(self._prev_comments) 2168 hint = self._parse_hint() 2169 overwrite = self._match(TokenType.OVERWRITE) 2170 ignore = self._match(TokenType.IGNORE) 2171 local = self._match_text_seq("LOCAL") 2172 alternative = None 2173 is_function = None 2174 2175 if self._match_text_seq("DIRECTORY"): 2176 this: t.Optional[exp.Expression] = self.expression( 2177 exp.Directory, 2178 this=self._parse_var_or_string(), 2179 local=local, 2180 row_format=self._parse_row_format(match_row=True), 2181 ) 2182 else: 2183 if self._match(TokenType.OR): 2184 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2185 2186 self._match(TokenType.INTO) 2187 comments += ensure_list(self._prev_comments) 2188 self._match(TokenType.TABLE) 2189 is_function = self._match(TokenType.FUNCTION) 2190 2191 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2192 2193 returning = self._parse_returning() 2194 2195 return self.expression( 2196 exp.Insert, 2197 comments=comments, 2198 hint=hint, 2199 is_function=is_function, 2200 this=this, 2201 by_name=self._match_text_seq("BY", "NAME"), 2202 exists=self._parse_exists(), 2203 partition=self._parse_partition(), 2204 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2205 and self._parse_conjunction(), 2206 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2207 conflict=self._parse_on_conflict(), 2208 returning=returning or self._parse_returning(), 2209 overwrite=overwrite, 2210 alternative=alternative, 2211 ignore=ignore, 2212 ) 2213 2214 def _parse_kill(self) -> exp.Kill: 2215 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2216 2217 return self.expression( 2218 exp.Kill, 2219 this=self._parse_primary(), 2220 kind=kind, 2221 ) 2222 2223 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2224 conflict = self._match_text_seq("ON", "CONFLICT") 2225 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2226 2227 if not conflict and not duplicate: 2228 return None 2229 2230 conflict_keys = None 2231 constraint = None 2232 2233 if conflict: 2234 if self._match_text_seq("ON", "CONSTRAINT"): 2235 constraint = self._parse_id_var() 2236 elif self._match(TokenType.L_PAREN): 2237 conflict_keys = self._parse_csv(self._parse_id_var) 2238 self._match_r_paren() 2239 2240 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2241 if self._prev.token_type == TokenType.UPDATE: 2242 self._match(TokenType.SET) 2243 expressions = self._parse_csv(self._parse_equality) 2244 else: 2245 expressions = None 2246 2247 return self.expression( 2248 exp.OnConflict, 2249 duplicate=duplicate, 2250 expressions=expressions, 2251 action=action, 2252 conflict_keys=conflict_keys, 2253 constraint=constraint, 2254 ) 2255 2256 def _parse_returning(self) -> t.Optional[exp.Returning]: 2257 if not self._match(TokenType.RETURNING): 2258 return None 2259 return self.expression( 2260 exp.Returning, 2261 expressions=self._parse_csv(self._parse_expression), 2262 into=self._match(TokenType.INTO) and self._parse_table_part(), 2263 ) 2264 2265 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2266 if not self._match(TokenType.FORMAT): 2267 return None 2268 return self._parse_row_format() 2269 2270 def _parse_row_format( 2271 self, match_row: bool = False 2272 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2273 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2274 return None 2275 2276 if self._match_text_seq("SERDE"): 2277 this = self._parse_string() 2278 2279 serde_properties = None 2280 if self._match(TokenType.SERDE_PROPERTIES): 2281 serde_properties = self.expression( 2282 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2283 ) 2284 2285 return self.expression( 2286 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2287 ) 2288 2289 self._match_text_seq("DELIMITED") 2290 2291 kwargs = {} 2292 2293 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2294 kwargs["fields"] = self._parse_string() 2295 if self._match_text_seq("ESCAPED", "BY"): 2296 kwargs["escaped"] = self._parse_string() 2297 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2298 kwargs["collection_items"] = self._parse_string() 2299 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2300 kwargs["map_keys"] = self._parse_string() 2301 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2302 kwargs["lines"] = self._parse_string() 2303 if self._match_text_seq("NULL", "DEFINED", "AS"): 2304 kwargs["null"] = self._parse_string() 2305 2306 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2307 2308 def _parse_load(self) -> exp.LoadData | exp.Command: 2309 if self._match_text_seq("DATA"): 2310 local = self._match_text_seq("LOCAL") 2311 self._match_text_seq("INPATH") 2312 inpath = self._parse_string() 2313 overwrite = self._match(TokenType.OVERWRITE) 2314 self._match_pair(TokenType.INTO, TokenType.TABLE) 2315 2316 return self.expression( 2317 exp.LoadData, 2318 this=self._parse_table(schema=True), 2319 local=local, 2320 overwrite=overwrite, 2321 inpath=inpath, 2322 partition=self._parse_partition(), 2323 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2324 serde=self._match_text_seq("SERDE") and self._parse_string(), 2325 ) 2326 return self._parse_as_command(self._prev) 2327 2328 def _parse_delete(self) -> exp.Delete: 2329 # This handles MySQL's "Multiple-Table Syntax" 2330 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2331 tables = None 2332 comments = self._prev_comments 2333 if not self._match(TokenType.FROM, advance=False): 2334 tables = self._parse_csv(self._parse_table) or None 2335 2336 returning = self._parse_returning() 2337 2338 return self.expression( 2339 exp.Delete, 2340 comments=comments, 2341 tables=tables, 2342 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2343 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2344 where=self._parse_where(), 2345 returning=returning or self._parse_returning(), 2346 limit=self._parse_limit(), 2347 ) 2348 2349 def _parse_update(self) -> exp.Update: 2350 comments = self._prev_comments 2351 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2352 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2353 returning = self._parse_returning() 2354 return self.expression( 2355 exp.Update, 2356 comments=comments, 2357 **{ # type: ignore 2358 "this": this, 2359 "expressions": expressions, 2360 "from": self._parse_from(joins=True), 2361 "where": self._parse_where(), 2362 "returning": returning or self._parse_returning(), 2363 "order": self._parse_order(), 2364 "limit": self._parse_limit(), 2365 }, 2366 ) 2367 2368 def _parse_uncache(self) -> exp.Uncache: 2369 if not self._match(TokenType.TABLE): 2370 self.raise_error("Expecting TABLE after UNCACHE") 2371 2372 return self.expression( 2373 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2374 ) 2375 2376 def _parse_cache(self) -> exp.Cache: 2377 lazy = self._match_text_seq("LAZY") 2378 self._match(TokenType.TABLE) 2379 table = self._parse_table(schema=True) 2380 2381 options = [] 2382 if self._match_text_seq("OPTIONS"): 2383 self._match_l_paren() 2384 k = self._parse_string() 2385 self._match(TokenType.EQ) 2386 v = self._parse_string() 2387 options = [k, v] 2388 self._match_r_paren() 2389 2390 self._match(TokenType.ALIAS) 2391 return self.expression( 2392 exp.Cache, 2393 this=table, 2394 lazy=lazy, 2395 options=options, 2396 expression=self._parse_select(nested=True), 2397 ) 2398 2399 def _parse_partition(self) -> t.Optional[exp.Partition]: 2400 if not self._match(TokenType.PARTITION): 2401 return None 2402 2403 return self.expression( 2404 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2405 ) 2406 2407 def _parse_value(self) -> exp.Tuple: 2408 if self._match(TokenType.L_PAREN): 2409 expressions = self._parse_csv(self._parse_expression) 2410 self._match_r_paren() 2411 return self.expression(exp.Tuple, expressions=expressions) 2412 2413 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2414 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2415 2416 def _parse_projections(self) -> t.List[exp.Expression]: 2417 return self._parse_expressions() 2418 2419 def _parse_select( 2420 self, 2421 nested: bool = False, 2422 table: bool = False, 2423 parse_subquery_alias: bool = True, 2424 parse_set_operation: bool = True, 2425 ) -> t.Optional[exp.Expression]: 2426 cte = self._parse_with() 2427 2428 if cte: 2429 this = self._parse_statement() 2430 2431 if not this: 2432 self.raise_error("Failed to parse any statement following CTE") 2433 return cte 2434 2435 if "with" in this.arg_types: 2436 this.set("with", cte) 2437 else: 2438 self.raise_error(f"{this.key} does not support CTE") 2439 this = cte 2440 2441 return this 2442 2443 # duckdb supports leading with FROM x 2444 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2445 2446 if self._match(TokenType.SELECT): 2447 comments = self._prev_comments 2448 2449 hint = self._parse_hint() 2450 all_ = self._match(TokenType.ALL) 2451 distinct = self._match_set(self.DISTINCT_TOKENS) 2452 2453 kind = ( 2454 self._match(TokenType.ALIAS) 2455 and self._match_texts(("STRUCT", "VALUE")) 2456 and self._prev.text.upper() 2457 ) 2458 2459 if distinct: 2460 distinct = self.expression( 2461 exp.Distinct, 2462 on=self._parse_value() if self._match(TokenType.ON) else None, 2463 ) 2464 2465 if all_ and distinct: 2466 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2467 2468 limit = self._parse_limit(top=True) 2469 projections = self._parse_projections() 2470 2471 this = self.expression( 2472 exp.Select, 2473 kind=kind, 2474 hint=hint, 2475 distinct=distinct, 2476 expressions=projections, 2477 limit=limit, 2478 ) 2479 this.comments = comments 2480 2481 into = self._parse_into() 2482 if into: 2483 this.set("into", into) 2484 2485 if not from_: 2486 from_ = self._parse_from() 2487 2488 if from_: 2489 this.set("from", from_) 2490 2491 this = self._parse_query_modifiers(this) 2492 elif (table or nested) and self._match(TokenType.L_PAREN): 2493 if self._match(TokenType.PIVOT): 2494 this = self._parse_simplified_pivot() 2495 elif self._match(TokenType.FROM): 2496 this = exp.select("*").from_( 2497 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2498 ) 2499 else: 2500 this = ( 2501 self._parse_table() 2502 if table 2503 else self._parse_select(nested=True, parse_set_operation=False) 2504 ) 2505 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2506 2507 self._match_r_paren() 2508 2509 # We return early here so that the UNION isn't attached to the subquery by the 2510 # following call to _parse_set_operations, but instead becomes the parent node 2511 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2512 elif self._match(TokenType.VALUES, advance=False): 2513 this = self._parse_derived_table_values() 2514 elif from_: 2515 this = exp.select("*").from_(from_.this, copy=False) 2516 else: 2517 this = None 2518 2519 if parse_set_operation: 2520 return self._parse_set_operations(this) 2521 return this 2522 2523 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2524 if not skip_with_token and not self._match(TokenType.WITH): 2525 return None 2526 2527 comments = self._prev_comments 2528 recursive = self._match(TokenType.RECURSIVE) 2529 2530 expressions = [] 2531 while True: 2532 expressions.append(self._parse_cte()) 2533 2534 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2535 break 2536 else: 2537 self._match(TokenType.WITH) 2538 2539 return self.expression( 2540 exp.With, comments=comments, expressions=expressions, recursive=recursive 2541 ) 2542 2543 def _parse_cte(self) -> exp.CTE: 2544 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2545 if not alias or not alias.this: 2546 self.raise_error("Expected CTE to have alias") 2547 2548 self._match(TokenType.ALIAS) 2549 return self.expression( 2550 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2551 ) 2552 2553 def _parse_table_alias( 2554 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2555 ) -> t.Optional[exp.TableAlias]: 2556 any_token = self._match(TokenType.ALIAS) 2557 alias = ( 2558 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2559 or self._parse_string_as_identifier() 2560 ) 2561 2562 index = self._index 2563 if self._match(TokenType.L_PAREN): 2564 columns = self._parse_csv(self._parse_function_parameter) 2565 self._match_r_paren() if columns else self._retreat(index) 2566 else: 2567 columns = None 2568 2569 if not alias and not columns: 2570 return None 2571 2572 return self.expression(exp.TableAlias, this=alias, columns=columns) 2573 2574 def _parse_subquery( 2575 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2576 ) -> t.Optional[exp.Subquery]: 2577 if not this: 2578 return None 2579 2580 return self.expression( 2581 exp.Subquery, 2582 this=this, 2583 pivots=self._parse_pivots(), 2584 alias=self._parse_table_alias() if parse_alias else None, 2585 ) 2586 2587 def _implicit_unnests_to_explicit(self, this: E) -> E: 2588 from sqlglot.optimizer.normalize_identifiers import ( 2589 normalize_identifiers as _norm, 2590 ) 2591 2592 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2593 for i, join in enumerate(this.args.get("joins") or []): 2594 table = join.this 2595 normalized_table = table.copy() 2596 normalized_table.meta["maybe_column"] = True 2597 normalized_table = _norm(normalized_table, dialect=self.dialect) 2598 2599 if isinstance(table, exp.Table) and not join.args.get("on"): 2600 if normalized_table.parts[0].name in refs: 2601 table_as_column = table.to_column() 2602 unnest = exp.Unnest(expressions=[table_as_column]) 2603 2604 # Table.to_column creates a parent Alias node that we want to convert to 2605 # a TableAlias and attach to the Unnest, so it matches the parser's output 2606 if isinstance(table.args.get("alias"), exp.TableAlias): 2607 table_as_column.replace(table_as_column.this) 2608 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2609 2610 table.replace(unnest) 2611 2612 refs.add(normalized_table.alias_or_name) 2613 2614 return this 2615 2616 def _parse_query_modifiers( 2617 self, this: t.Optional[exp.Expression] 2618 ) -> t.Optional[exp.Expression]: 2619 if isinstance(this, (exp.Query, exp.Table)): 2620 for join in iter(self._parse_join, None): 2621 this.append("joins", join) 2622 for lateral in iter(self._parse_lateral, None): 2623 this.append("laterals", lateral) 2624 2625 while True: 2626 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2627 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2628 key, expression = parser(self) 2629 2630 if expression: 2631 this.set(key, expression) 2632 if key == "limit": 2633 offset = expression.args.pop("offset", None) 2634 2635 if offset: 2636 offset = exp.Offset(expression=offset) 2637 this.set("offset", offset) 2638 2639 limit_by_expressions = expression.expressions 2640 expression.set("expressions", None) 2641 offset.set("expressions", limit_by_expressions) 2642 continue 2643 break 2644 2645 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2646 this = self._implicit_unnests_to_explicit(this) 2647 2648 return this 2649 2650 def _parse_hint(self) -> t.Optional[exp.Hint]: 2651 if self._match(TokenType.HINT): 2652 hints = [] 2653 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2654 hints.extend(hint) 2655 2656 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2657 self.raise_error("Expected */ after HINT") 2658 2659 return self.expression(exp.Hint, expressions=hints) 2660 2661 return None 2662 2663 def _parse_into(self) -> t.Optional[exp.Into]: 2664 if not self._match(TokenType.INTO): 2665 return None 2666 2667 temp = self._match(TokenType.TEMPORARY) 2668 unlogged = self._match_text_seq("UNLOGGED") 2669 self._match(TokenType.TABLE) 2670 2671 return self.expression( 2672 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2673 ) 2674 2675 def _parse_from( 2676 self, joins: bool = False, skip_from_token: bool = False 2677 ) -> t.Optional[exp.From]: 2678 if not skip_from_token and not self._match(TokenType.FROM): 2679 return None 2680 2681 return self.expression( 2682 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2683 ) 2684 2685 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2686 if not self._match(TokenType.MATCH_RECOGNIZE): 2687 return None 2688 2689 self._match_l_paren() 2690 2691 partition = self._parse_partition_by() 2692 order = self._parse_order() 2693 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2694 2695 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2696 rows = exp.var("ONE ROW PER MATCH") 2697 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2698 text = "ALL ROWS PER MATCH" 2699 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2700 text += " SHOW EMPTY MATCHES" 2701 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2702 text += " OMIT EMPTY MATCHES" 2703 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2704 text += " WITH UNMATCHED ROWS" 2705 rows = exp.var(text) 2706 else: 2707 rows = None 2708 2709 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2710 text = "AFTER MATCH SKIP" 2711 if self._match_text_seq("PAST", "LAST", "ROW"): 2712 text += " PAST LAST ROW" 2713 elif self._match_text_seq("TO", "NEXT", "ROW"): 2714 text += " TO NEXT ROW" 2715 elif self._match_text_seq("TO", "FIRST"): 2716 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2717 elif self._match_text_seq("TO", "LAST"): 2718 text += f" TO LAST {self._advance_any().text}" # type: ignore 2719 after = exp.var(text) 2720 else: 2721 after = None 2722 2723 if self._match_text_seq("PATTERN"): 2724 self._match_l_paren() 2725 2726 if not self._curr: 2727 self.raise_error("Expecting )", self._curr) 2728 2729 paren = 1 2730 start = self._curr 2731 2732 while self._curr and paren > 0: 2733 if self._curr.token_type == TokenType.L_PAREN: 2734 paren += 1 2735 if self._curr.token_type == TokenType.R_PAREN: 2736 paren -= 1 2737 2738 end = self._prev 2739 self._advance() 2740 2741 if paren > 0: 2742 self.raise_error("Expecting )", self._curr) 2743 2744 pattern = exp.var(self._find_sql(start, end)) 2745 else: 2746 pattern = None 2747 2748 define = ( 2749 self._parse_csv(self._parse_name_as_expression) 2750 if self._match_text_seq("DEFINE") 2751 else None 2752 ) 2753 2754 self._match_r_paren() 2755 2756 return self.expression( 2757 exp.MatchRecognize, 2758 partition_by=partition, 2759 order=order, 2760 measures=measures, 2761 rows=rows, 2762 after=after, 2763 pattern=pattern, 2764 define=define, 2765 alias=self._parse_table_alias(), 2766 ) 2767 2768 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2769 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2770 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2771 cross_apply = False 2772 2773 if cross_apply is not None: 2774 this = self._parse_select(table=True) 2775 view = None 2776 outer = None 2777 elif self._match(TokenType.LATERAL): 2778 this = self._parse_select(table=True) 2779 view = self._match(TokenType.VIEW) 2780 outer = self._match(TokenType.OUTER) 2781 else: 2782 return None 2783 2784 if not this: 2785 this = ( 2786 self._parse_unnest() 2787 or self._parse_function() 2788 or self._parse_id_var(any_token=False) 2789 ) 2790 2791 while self._match(TokenType.DOT): 2792 this = exp.Dot( 2793 this=this, 2794 expression=self._parse_function() or self._parse_id_var(any_token=False), 2795 ) 2796 2797 if view: 2798 table = self._parse_id_var(any_token=False) 2799 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2800 table_alias: t.Optional[exp.TableAlias] = self.expression( 2801 exp.TableAlias, this=table, columns=columns 2802 ) 2803 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2804 # We move the alias from the lateral's child node to the lateral itself 2805 table_alias = this.args["alias"].pop() 2806 else: 2807 table_alias = self._parse_table_alias() 2808 2809 return self.expression( 2810 exp.Lateral, 2811 this=this, 2812 view=view, 2813 outer=outer, 2814 alias=table_alias, 2815 cross_apply=cross_apply, 2816 ) 2817 2818 def _parse_join_parts( 2819 self, 2820 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2821 return ( 2822 self._match_set(self.JOIN_METHODS) and self._prev, 2823 self._match_set(self.JOIN_SIDES) and self._prev, 2824 self._match_set(self.JOIN_KINDS) and self._prev, 2825 ) 2826 2827 def _parse_join( 2828 self, skip_join_token: bool = False, parse_bracket: bool = False 2829 ) -> t.Optional[exp.Join]: 2830 if self._match(TokenType.COMMA): 2831 return self.expression(exp.Join, this=self._parse_table()) 2832 2833 index = self._index 2834 method, side, kind = self._parse_join_parts() 2835 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2836 join = self._match(TokenType.JOIN) 2837 2838 if not skip_join_token and not join: 2839 self._retreat(index) 2840 kind = None 2841 method = None 2842 side = None 2843 2844 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2845 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2846 2847 if not skip_join_token and not join and not outer_apply and not cross_apply: 2848 return None 2849 2850 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2851 2852 if method: 2853 kwargs["method"] = method.text 2854 if side: 2855 kwargs["side"] = side.text 2856 if kind: 2857 kwargs["kind"] = kind.text 2858 if hint: 2859 kwargs["hint"] = hint 2860 2861 if self._match(TokenType.ON): 2862 kwargs["on"] = self._parse_conjunction() 2863 elif self._match(TokenType.USING): 2864 kwargs["using"] = self._parse_wrapped_id_vars() 2865 elif not (kind and kind.token_type == TokenType.CROSS): 2866 index = self._index 2867 join = self._parse_join() 2868 2869 if join and self._match(TokenType.ON): 2870 kwargs["on"] = self._parse_conjunction() 2871 elif join and self._match(TokenType.USING): 2872 kwargs["using"] = self._parse_wrapped_id_vars() 2873 else: 2874 join = None 2875 self._retreat(index) 2876 2877 kwargs["this"].set("joins", [join] if join else None) 2878 2879 comments = [c for token in (method, side, kind) if token for c in token.comments] 2880 return self.expression(exp.Join, comments=comments, **kwargs) 2881 2882 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2883 this = self._parse_conjunction() 2884 2885 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2886 return this 2887 2888 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2889 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2890 2891 return this 2892 2893 def _parse_index_params(self) -> exp.IndexParameters: 2894 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2895 2896 if self._match(TokenType.L_PAREN, advance=False): 2897 columns = self._parse_wrapped_csv(self._parse_with_operator) 2898 else: 2899 columns = None 2900 2901 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2902 partition_by = self._parse_partition_by() 2903 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2904 tablespace = ( 2905 self._parse_var(any_token=True) 2906 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2907 else None 2908 ) 2909 where = self._parse_where() 2910 2911 return self.expression( 2912 exp.IndexParameters, 2913 using=using, 2914 columns=columns, 2915 include=include, 2916 partition_by=partition_by, 2917 where=where, 2918 with_storage=with_storage, 2919 tablespace=tablespace, 2920 ) 2921 2922 def _parse_index( 2923 self, 2924 index: t.Optional[exp.Expression] = None, 2925 ) -> t.Optional[exp.Index]: 2926 if index: 2927 unique = None 2928 primary = None 2929 amp = None 2930 2931 self._match(TokenType.ON) 2932 self._match(TokenType.TABLE) # hive 2933 table = self._parse_table_parts(schema=True) 2934 else: 2935 unique = self._match(TokenType.UNIQUE) 2936 primary = self._match_text_seq("PRIMARY") 2937 amp = self._match_text_seq("AMP") 2938 2939 if not self._match(TokenType.INDEX): 2940 return None 2941 2942 index = self._parse_id_var() 2943 table = None 2944 2945 params = self._parse_index_params() 2946 2947 return self.expression( 2948 exp.Index, 2949 this=index, 2950 table=table, 2951 unique=unique, 2952 primary=primary, 2953 amp=amp, 2954 params=params, 2955 ) 2956 2957 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2958 hints: t.List[exp.Expression] = [] 2959 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2960 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2961 hints.append( 2962 self.expression( 2963 exp.WithTableHint, 2964 expressions=self._parse_csv( 2965 lambda: self._parse_function() or self._parse_var(any_token=True) 2966 ), 2967 ) 2968 ) 2969 self._match_r_paren() 2970 else: 2971 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2972 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2973 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2974 2975 self._match_texts(("INDEX", "KEY")) 2976 if self._match(TokenType.FOR): 2977 hint.set("target", self._advance_any() and self._prev.text.upper()) 2978 2979 hint.set("expressions", self._parse_wrapped_id_vars()) 2980 hints.append(hint) 2981 2982 return hints or None 2983 2984 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2985 return ( 2986 (not schema and self._parse_function(optional_parens=False)) 2987 or self._parse_id_var(any_token=False) 2988 or self._parse_string_as_identifier() 2989 or self._parse_placeholder() 2990 ) 2991 2992 def _parse_table_parts( 2993 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2994 ) -> exp.Table: 2995 catalog = None 2996 db = None 2997 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2998 2999 while self._match(TokenType.DOT): 3000 if catalog: 3001 # This allows nesting the table in arbitrarily many dot expressions if needed 3002 table = self.expression( 3003 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3004 ) 3005 else: 3006 catalog = db 3007 db = table 3008 # "" used for tsql FROM a..b case 3009 table = self._parse_table_part(schema=schema) or "" 3010 3011 if ( 3012 wildcard 3013 and self._is_connected() 3014 and (isinstance(table, exp.Identifier) or not table) 3015 and self._match(TokenType.STAR) 3016 ): 3017 if isinstance(table, exp.Identifier): 3018 table.args["this"] += "*" 3019 else: 3020 table = exp.Identifier(this="*") 3021 3022 if is_db_reference: 3023 catalog = db 3024 db = table 3025 table = None 3026 3027 if not table and not is_db_reference: 3028 self.raise_error(f"Expected table name but got {self._curr}") 3029 if not db and is_db_reference: 3030 self.raise_error(f"Expected database name but got {self._curr}") 3031 3032 return self.expression( 3033 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3034 ) 3035 3036 def _parse_table( 3037 self, 3038 schema: bool = False, 3039 joins: bool = False, 3040 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3041 parse_bracket: bool = False, 3042 is_db_reference: bool = False, 3043 ) -> t.Optional[exp.Expression]: 3044 lateral = self._parse_lateral() 3045 if lateral: 3046 return lateral 3047 3048 unnest = self._parse_unnest() 3049 if unnest: 3050 return unnest 3051 3052 values = self._parse_derived_table_values() 3053 if values: 3054 return values 3055 3056 subquery = self._parse_select(table=True) 3057 if subquery: 3058 if not subquery.args.get("pivots"): 3059 subquery.set("pivots", self._parse_pivots()) 3060 return subquery 3061 3062 bracket = parse_bracket and self._parse_bracket(None) 3063 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3064 3065 only = self._match(TokenType.ONLY) 3066 3067 this = t.cast( 3068 exp.Expression, 3069 bracket 3070 or self._parse_bracket( 3071 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3072 ), 3073 ) 3074 3075 if only: 3076 this.set("only", only) 3077 3078 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3079 self._match_text_seq("*") 3080 3081 if schema: 3082 return self._parse_schema(this=this) 3083 3084 version = self._parse_version() 3085 3086 if version: 3087 this.set("version", version) 3088 3089 if self.dialect.ALIAS_POST_TABLESAMPLE: 3090 table_sample = self._parse_table_sample() 3091 3092 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3093 if alias: 3094 this.set("alias", alias) 3095 3096 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3097 return self.expression( 3098 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3099 ) 3100 3101 this.set("hints", self._parse_table_hints()) 3102 3103 if not this.args.get("pivots"): 3104 this.set("pivots", self._parse_pivots()) 3105 3106 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3107 table_sample = self._parse_table_sample() 3108 3109 if table_sample: 3110 table_sample.set("this", this) 3111 this = table_sample 3112 3113 if joins: 3114 for join in iter(self._parse_join, None): 3115 this.append("joins", join) 3116 3117 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3118 this.set("ordinality", True) 3119 this.set("alias", self._parse_table_alias()) 3120 3121 return this 3122 3123 def _parse_version(self) -> t.Optional[exp.Version]: 3124 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3125 this = "TIMESTAMP" 3126 elif self._match(TokenType.VERSION_SNAPSHOT): 3127 this = "VERSION" 3128 else: 3129 return None 3130 3131 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3132 kind = self._prev.text.upper() 3133 start = self._parse_bitwise() 3134 self._match_texts(("TO", "AND")) 3135 end = self._parse_bitwise() 3136 expression: t.Optional[exp.Expression] = self.expression( 3137 exp.Tuple, expressions=[start, end] 3138 ) 3139 elif self._match_text_seq("CONTAINED", "IN"): 3140 kind = "CONTAINED IN" 3141 expression = self.expression( 3142 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3143 ) 3144 elif self._match(TokenType.ALL): 3145 kind = "ALL" 3146 expression = None 3147 else: 3148 self._match_text_seq("AS", "OF") 3149 kind = "AS OF" 3150 expression = self._parse_type() 3151 3152 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3153 3154 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3155 if not self._match(TokenType.UNNEST): 3156 return None 3157 3158 expressions = self._parse_wrapped_csv(self._parse_equality) 3159 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3160 3161 alias = self._parse_table_alias() if with_alias else None 3162 3163 if alias: 3164 if self.dialect.UNNEST_COLUMN_ONLY: 3165 if alias.args.get("columns"): 3166 self.raise_error("Unexpected extra column alias in unnest.") 3167 3168 alias.set("columns", [alias.this]) 3169 alias.set("this", None) 3170 3171 columns = alias.args.get("columns") or [] 3172 if offset and len(expressions) < len(columns): 3173 offset = columns.pop() 3174 3175 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3176 self._match(TokenType.ALIAS) 3177 offset = self._parse_id_var( 3178 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3179 ) or exp.to_identifier("offset") 3180 3181 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3182 3183 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3184 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3185 if not is_derived and not self._match_text_seq("VALUES"): 3186 return None 3187 3188 expressions = self._parse_csv(self._parse_value) 3189 alias = self._parse_table_alias() 3190 3191 if is_derived: 3192 self._match_r_paren() 3193 3194 return self.expression( 3195 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3196 ) 3197 3198 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3199 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3200 as_modifier and self._match_text_seq("USING", "SAMPLE") 3201 ): 3202 return None 3203 3204 bucket_numerator = None 3205 bucket_denominator = None 3206 bucket_field = None 3207 percent = None 3208 size = None 3209 seed = None 3210 3211 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3212 matched_l_paren = self._match(TokenType.L_PAREN) 3213 3214 if self.TABLESAMPLE_CSV: 3215 num = None 3216 expressions = self._parse_csv(self._parse_primary) 3217 else: 3218 expressions = None 3219 num = ( 3220 self._parse_factor() 3221 if self._match(TokenType.NUMBER, advance=False) 3222 else self._parse_primary() or self._parse_placeholder() 3223 ) 3224 3225 if self._match_text_seq("BUCKET"): 3226 bucket_numerator = self._parse_number() 3227 self._match_text_seq("OUT", "OF") 3228 bucket_denominator = bucket_denominator = self._parse_number() 3229 self._match(TokenType.ON) 3230 bucket_field = self._parse_field() 3231 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3232 percent = num 3233 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3234 size = num 3235 else: 3236 percent = num 3237 3238 if matched_l_paren: 3239 self._match_r_paren() 3240 3241 if self._match(TokenType.L_PAREN): 3242 method = self._parse_var(upper=True) 3243 seed = self._match(TokenType.COMMA) and self._parse_number() 3244 self._match_r_paren() 3245 elif self._match_texts(("SEED", "REPEATABLE")): 3246 seed = self._parse_wrapped(self._parse_number) 3247 3248 return self.expression( 3249 exp.TableSample, 3250 expressions=expressions, 3251 method=method, 3252 bucket_numerator=bucket_numerator, 3253 bucket_denominator=bucket_denominator, 3254 bucket_field=bucket_field, 3255 percent=percent, 3256 size=size, 3257 seed=seed, 3258 ) 3259 3260 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3261 return list(iter(self._parse_pivot, None)) or None 3262 3263 # https://duckdb.org/docs/sql/statements/pivot 3264 def _parse_simplified_pivot(self) -> exp.Pivot: 3265 def _parse_on() -> t.Optional[exp.Expression]: 3266 this = self._parse_bitwise() 3267 return self._parse_in(this) if self._match(TokenType.IN) else this 3268 3269 this = self._parse_table() 3270 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3271 using = self._match(TokenType.USING) and self._parse_csv( 3272 lambda: self._parse_alias(self._parse_function()) 3273 ) 3274 group = self._parse_group() 3275 return self.expression( 3276 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3277 ) 3278 3279 def _parse_pivot_in(self) -> exp.In: 3280 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3281 this = self._parse_conjunction() 3282 3283 self._match(TokenType.ALIAS) 3284 alias = self._parse_field() 3285 if alias: 3286 return self.expression(exp.PivotAlias, this=this, alias=alias) 3287 3288 return this 3289 3290 value = self._parse_column() 3291 3292 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3293 self.raise_error("Expecting IN (") 3294 3295 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3296 3297 self._match_r_paren() 3298 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3299 3300 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3301 index = self._index 3302 include_nulls = None 3303 3304 if self._match(TokenType.PIVOT): 3305 unpivot = False 3306 elif self._match(TokenType.UNPIVOT): 3307 unpivot = True 3308 3309 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3310 if self._match_text_seq("INCLUDE", "NULLS"): 3311 include_nulls = True 3312 elif self._match_text_seq("EXCLUDE", "NULLS"): 3313 include_nulls = False 3314 else: 3315 return None 3316 3317 expressions = [] 3318 3319 if not self._match(TokenType.L_PAREN): 3320 self._retreat(index) 3321 return None 3322 3323 if unpivot: 3324 expressions = self._parse_csv(self._parse_column) 3325 else: 3326 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3327 3328 if not expressions: 3329 self.raise_error("Failed to parse PIVOT's aggregation list") 3330 3331 if not self._match(TokenType.FOR): 3332 self.raise_error("Expecting FOR") 3333 3334 field = self._parse_pivot_in() 3335 3336 self._match_r_paren() 3337 3338 pivot = self.expression( 3339 exp.Pivot, 3340 expressions=expressions, 3341 field=field, 3342 unpivot=unpivot, 3343 include_nulls=include_nulls, 3344 ) 3345 3346 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3347 pivot.set("alias", self._parse_table_alias()) 3348 3349 if not unpivot: 3350 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3351 3352 columns: t.List[exp.Expression] = [] 3353 for fld in pivot.args["field"].expressions: 3354 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3355 for name in names: 3356 if self.PREFIXED_PIVOT_COLUMNS: 3357 name = f"{name}_{field_name}" if name else field_name 3358 else: 3359 name = f"{field_name}_{name}" if name else field_name 3360 3361 columns.append(exp.to_identifier(name)) 3362 3363 pivot.set("columns", columns) 3364 3365 return pivot 3366 3367 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3368 return [agg.alias for agg in aggregations] 3369 3370 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3371 if not skip_where_token and not self._match(TokenType.PREWHERE): 3372 return None 3373 3374 return self.expression( 3375 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3376 ) 3377 3378 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3379 if not skip_where_token and not self._match(TokenType.WHERE): 3380 return None 3381 3382 return self.expression( 3383 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3384 ) 3385 3386 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3387 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3388 return None 3389 3390 elements = defaultdict(list) 3391 3392 if self._match(TokenType.ALL): 3393 return self.expression(exp.Group, all=True) 3394 3395 while True: 3396 expressions = self._parse_csv(self._parse_conjunction) 3397 if expressions: 3398 elements["expressions"].extend(expressions) 3399 3400 grouping_sets = self._parse_grouping_sets() 3401 if grouping_sets: 3402 elements["grouping_sets"].extend(grouping_sets) 3403 3404 rollup = None 3405 cube = None 3406 totals = None 3407 3408 index = self._index 3409 with_ = self._match(TokenType.WITH) 3410 if self._match(TokenType.ROLLUP): 3411 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3412 elements["rollup"].extend(ensure_list(rollup)) 3413 3414 if self._match(TokenType.CUBE): 3415 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3416 elements["cube"].extend(ensure_list(cube)) 3417 3418 if self._match_text_seq("TOTALS"): 3419 totals = True 3420 elements["totals"] = True # type: ignore 3421 3422 if not (grouping_sets or rollup or cube or totals): 3423 if with_: 3424 self._retreat(index) 3425 break 3426 3427 return self.expression(exp.Group, **elements) # type: ignore 3428 3429 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3430 if not self._match(TokenType.GROUPING_SETS): 3431 return None 3432 3433 return self._parse_wrapped_csv(self._parse_grouping_set) 3434 3435 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3436 if self._match(TokenType.L_PAREN): 3437 grouping_set = self._parse_csv(self._parse_column) 3438 self._match_r_paren() 3439 return self.expression(exp.Tuple, expressions=grouping_set) 3440 3441 return self._parse_column() 3442 3443 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3444 if not skip_having_token and not self._match(TokenType.HAVING): 3445 return None 3446 return self.expression(exp.Having, this=self._parse_conjunction()) 3447 3448 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3449 if not self._match(TokenType.QUALIFY): 3450 return None 3451 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3452 3453 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3454 if skip_start_token: 3455 start = None 3456 elif self._match(TokenType.START_WITH): 3457 start = self._parse_conjunction() 3458 else: 3459 return None 3460 3461 self._match(TokenType.CONNECT_BY) 3462 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3463 exp.Prior, this=self._parse_bitwise() 3464 ) 3465 connect = self._parse_conjunction() 3466 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3467 3468 if not start and self._match(TokenType.START_WITH): 3469 start = self._parse_conjunction() 3470 3471 return self.expression(exp.Connect, start=start, connect=connect) 3472 3473 def _parse_name_as_expression(self) -> exp.Alias: 3474 return self.expression( 3475 exp.Alias, 3476 alias=self._parse_id_var(any_token=True), 3477 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3478 ) 3479 3480 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3481 if self._match_text_seq("INTERPOLATE"): 3482 return self._parse_wrapped_csv(self._parse_name_as_expression) 3483 return None 3484 3485 def _parse_order( 3486 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3487 ) -> t.Optional[exp.Expression]: 3488 siblings = None 3489 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3490 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3491 return this 3492 3493 siblings = True 3494 3495 return self.expression( 3496 exp.Order, 3497 this=this, 3498 expressions=self._parse_csv(self._parse_ordered), 3499 interpolate=self._parse_interpolate(), 3500 siblings=siblings, 3501 ) 3502 3503 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3504 if not self._match(token): 3505 return None 3506 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3507 3508 def _parse_ordered( 3509 self, parse_method: t.Optional[t.Callable] = None 3510 ) -> t.Optional[exp.Ordered]: 3511 this = parse_method() if parse_method else self._parse_conjunction() 3512 if not this: 3513 return None 3514 3515 asc = self._match(TokenType.ASC) 3516 desc = self._match(TokenType.DESC) or (asc and False) 3517 3518 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3519 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3520 3521 nulls_first = is_nulls_first or False 3522 explicitly_null_ordered = is_nulls_first or is_nulls_last 3523 3524 if ( 3525 not explicitly_null_ordered 3526 and ( 3527 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3528 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3529 ) 3530 and self.dialect.NULL_ORDERING != "nulls_are_last" 3531 ): 3532 nulls_first = True 3533 3534 if self._match_text_seq("WITH", "FILL"): 3535 with_fill = self.expression( 3536 exp.WithFill, 3537 **{ # type: ignore 3538 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3539 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3540 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3541 }, 3542 ) 3543 else: 3544 with_fill = None 3545 3546 return self.expression( 3547 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3548 ) 3549 3550 def _parse_limit( 3551 self, 3552 this: t.Optional[exp.Expression] = None, 3553 top: bool = False, 3554 skip_limit_token: bool = False, 3555 ) -> t.Optional[exp.Expression]: 3556 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3557 comments = self._prev_comments 3558 if top: 3559 limit_paren = self._match(TokenType.L_PAREN) 3560 expression = self._parse_term() if limit_paren else self._parse_number() 3561 3562 if limit_paren: 3563 self._match_r_paren() 3564 else: 3565 expression = self._parse_term() 3566 3567 if self._match(TokenType.COMMA): 3568 offset = expression 3569 expression = self._parse_term() 3570 else: 3571 offset = None 3572 3573 limit_exp = self.expression( 3574 exp.Limit, 3575 this=this, 3576 expression=expression, 3577 offset=offset, 3578 comments=comments, 3579 expressions=self._parse_limit_by(), 3580 ) 3581 3582 return limit_exp 3583 3584 if self._match(TokenType.FETCH): 3585 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3586 direction = self._prev.text.upper() if direction else "FIRST" 3587 3588 count = self._parse_field(tokens=self.FETCH_TOKENS) 3589 percent = self._match(TokenType.PERCENT) 3590 3591 self._match_set((TokenType.ROW, TokenType.ROWS)) 3592 3593 only = self._match_text_seq("ONLY") 3594 with_ties = self._match_text_seq("WITH", "TIES") 3595 3596 if only and with_ties: 3597 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3598 3599 return self.expression( 3600 exp.Fetch, 3601 direction=direction, 3602 count=count, 3603 percent=percent, 3604 with_ties=with_ties, 3605 ) 3606 3607 return this 3608 3609 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3610 if not self._match(TokenType.OFFSET): 3611 return this 3612 3613 count = self._parse_term() 3614 self._match_set((TokenType.ROW, TokenType.ROWS)) 3615 3616 return self.expression( 3617 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3618 ) 3619 3620 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3621 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3622 3623 def _parse_locks(self) -> t.List[exp.Lock]: 3624 locks = [] 3625 while True: 3626 if self._match_text_seq("FOR", "UPDATE"): 3627 update = True 3628 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3629 "LOCK", "IN", "SHARE", "MODE" 3630 ): 3631 update = False 3632 else: 3633 break 3634 3635 expressions = None 3636 if self._match_text_seq("OF"): 3637 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3638 3639 wait: t.Optional[bool | exp.Expression] = None 3640 if self._match_text_seq("NOWAIT"): 3641 wait = True 3642 elif self._match_text_seq("WAIT"): 3643 wait = self._parse_primary() 3644 elif self._match_text_seq("SKIP", "LOCKED"): 3645 wait = False 3646 3647 locks.append( 3648 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3649 ) 3650 3651 return locks 3652 3653 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3654 while this and self._match_set(self.SET_OPERATIONS): 3655 token_type = self._prev.token_type 3656 3657 if token_type == TokenType.UNION: 3658 operation = exp.Union 3659 elif token_type == TokenType.EXCEPT: 3660 operation = exp.Except 3661 else: 3662 operation = exp.Intersect 3663 3664 comments = self._prev.comments 3665 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3666 by_name = self._match_text_seq("BY", "NAME") 3667 expression = self._parse_select(nested=True, parse_set_operation=False) 3668 3669 this = self.expression( 3670 operation, 3671 comments=comments, 3672 this=this, 3673 distinct=distinct, 3674 by_name=by_name, 3675 expression=expression, 3676 ) 3677 3678 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3679 expression = this.expression 3680 3681 if expression: 3682 for arg in self.UNION_MODIFIERS: 3683 expr = expression.args.get(arg) 3684 if expr: 3685 this.set(arg, expr.pop()) 3686 3687 return this 3688 3689 def _parse_expression(self) -> t.Optional[exp.Expression]: 3690 return self._parse_alias(self._parse_conjunction()) 3691 3692 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3693 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3694 3695 def _parse_equality(self) -> t.Optional[exp.Expression]: 3696 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3697 3698 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3699 return self._parse_tokens(self._parse_range, self.COMPARISON) 3700 3701 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3702 this = this or self._parse_bitwise() 3703 negate = self._match(TokenType.NOT) 3704 3705 if self._match_set(self.RANGE_PARSERS): 3706 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3707 if not expression: 3708 return this 3709 3710 this = expression 3711 elif self._match(TokenType.ISNULL): 3712 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3713 3714 # Postgres supports ISNULL and NOTNULL for conditions. 3715 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3716 if self._match(TokenType.NOTNULL): 3717 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3718 this = self.expression(exp.Not, this=this) 3719 3720 if negate: 3721 this = self.expression(exp.Not, this=this) 3722 3723 if self._match(TokenType.IS): 3724 this = self._parse_is(this) 3725 3726 return this 3727 3728 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3729 index = self._index - 1 3730 negate = self._match(TokenType.NOT) 3731 3732 if self._match_text_seq("DISTINCT", "FROM"): 3733 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3734 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3735 3736 expression = self._parse_null() or self._parse_boolean() 3737 if not expression: 3738 self._retreat(index) 3739 return None 3740 3741 this = self.expression(exp.Is, this=this, expression=expression) 3742 return self.expression(exp.Not, this=this) if negate else this 3743 3744 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3745 unnest = self._parse_unnest(with_alias=False) 3746 if unnest: 3747 this = self.expression(exp.In, this=this, unnest=unnest) 3748 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3749 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3750 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3751 3752 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3753 this = self.expression(exp.In, this=this, query=expressions[0]) 3754 else: 3755 this = self.expression(exp.In, this=this, expressions=expressions) 3756 3757 if matched_l_paren: 3758 self._match_r_paren(this) 3759 elif not self._match(TokenType.R_BRACKET, expression=this): 3760 self.raise_error("Expecting ]") 3761 else: 3762 this = self.expression(exp.In, this=this, field=self._parse_field()) 3763 3764 return this 3765 3766 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3767 low = self._parse_bitwise() 3768 self._match(TokenType.AND) 3769 high = self._parse_bitwise() 3770 return self.expression(exp.Between, this=this, low=low, high=high) 3771 3772 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3773 if not self._match(TokenType.ESCAPE): 3774 return this 3775 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3776 3777 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3778 index = self._index 3779 3780 if not self._match(TokenType.INTERVAL) and match_interval: 3781 return None 3782 3783 if self._match(TokenType.STRING, advance=False): 3784 this = self._parse_primary() 3785 else: 3786 this = self._parse_term() 3787 3788 if not this or ( 3789 isinstance(this, exp.Column) 3790 and not this.table 3791 and not this.this.quoted 3792 and this.name.upper() == "IS" 3793 ): 3794 self._retreat(index) 3795 return None 3796 3797 unit = self._parse_function() or ( 3798 not self._match(TokenType.ALIAS, advance=False) 3799 and self._parse_var(any_token=True, upper=True) 3800 ) 3801 3802 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3803 # each INTERVAL expression into this canonical form so it's easy to transpile 3804 if this and this.is_number: 3805 this = exp.Literal.string(this.name) 3806 elif this and this.is_string: 3807 parts = this.name.split() 3808 3809 if len(parts) == 2: 3810 if unit: 3811 # This is not actually a unit, it's something else (e.g. a "window side") 3812 unit = None 3813 self._retreat(self._index - 1) 3814 3815 this = exp.Literal.string(parts[0]) 3816 unit = self.expression(exp.Var, this=parts[1].upper()) 3817 3818 return self.expression(exp.Interval, this=this, unit=unit) 3819 3820 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3821 this = self._parse_term() 3822 3823 while True: 3824 if self._match_set(self.BITWISE): 3825 this = self.expression( 3826 self.BITWISE[self._prev.token_type], 3827 this=this, 3828 expression=self._parse_term(), 3829 ) 3830 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3831 this = self.expression( 3832 exp.DPipe, 3833 this=this, 3834 expression=self._parse_term(), 3835 safe=not self.dialect.STRICT_STRING_CONCAT, 3836 ) 3837 elif self._match(TokenType.DQMARK): 3838 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3839 elif self._match_pair(TokenType.LT, TokenType.LT): 3840 this = self.expression( 3841 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3842 ) 3843 elif self._match_pair(TokenType.GT, TokenType.GT): 3844 this = self.expression( 3845 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3846 ) 3847 else: 3848 break 3849 3850 return this 3851 3852 def _parse_term(self) -> t.Optional[exp.Expression]: 3853 return self._parse_tokens(self._parse_factor, self.TERM) 3854 3855 def _parse_factor(self) -> t.Optional[exp.Expression]: 3856 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3857 this = parse_method() 3858 3859 while self._match_set(self.FACTOR): 3860 this = self.expression( 3861 self.FACTOR[self._prev.token_type], 3862 this=this, 3863 comments=self._prev_comments, 3864 expression=parse_method(), 3865 ) 3866 if isinstance(this, exp.Div): 3867 this.args["typed"] = self.dialect.TYPED_DIVISION 3868 this.args["safe"] = self.dialect.SAFE_DIVISION 3869 3870 return this 3871 3872 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3873 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3874 3875 def _parse_unary(self) -> t.Optional[exp.Expression]: 3876 if self._match_set(self.UNARY_PARSERS): 3877 return self.UNARY_PARSERS[self._prev.token_type](self) 3878 return self._parse_at_time_zone(self._parse_type()) 3879 3880 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3881 interval = parse_interval and self._parse_interval() 3882 if interval: 3883 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3884 while True: 3885 index = self._index 3886 self._match(TokenType.PLUS) 3887 3888 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3889 self._retreat(index) 3890 break 3891 3892 interval = self.expression( # type: ignore 3893 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3894 ) 3895 3896 return interval 3897 3898 index = self._index 3899 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3900 this = self._parse_column() 3901 3902 if data_type: 3903 if isinstance(this, exp.Literal): 3904 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3905 if parser: 3906 return parser(self, this, data_type) 3907 return self.expression(exp.Cast, this=this, to=data_type) 3908 if not data_type.expressions: 3909 self._retreat(index) 3910 return self._parse_column() 3911 return self._parse_column_ops(data_type) 3912 3913 return this and self._parse_column_ops(this) 3914 3915 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3916 this = self._parse_type() 3917 if not this: 3918 return None 3919 3920 return self.expression( 3921 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3922 ) 3923 3924 def _parse_types( 3925 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3926 ) -> t.Optional[exp.Expression]: 3927 index = self._index 3928 3929 prefix = self._match_text_seq("SYSUDTLIB", ".") 3930 3931 if not self._match_set(self.TYPE_TOKENS): 3932 identifier = allow_identifiers and self._parse_id_var( 3933 any_token=False, tokens=(TokenType.VAR,) 3934 ) 3935 if identifier: 3936 tokens = self.dialect.tokenize(identifier.name) 3937 3938 if len(tokens) != 1: 3939 self.raise_error("Unexpected identifier", self._prev) 3940 3941 if tokens[0].token_type in self.TYPE_TOKENS: 3942 self._prev = tokens[0] 3943 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3944 type_name = identifier.name 3945 3946 while self._match(TokenType.DOT): 3947 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3948 3949 return exp.DataType.build(type_name, udt=True) 3950 else: 3951 self._retreat(self._index - 1) 3952 return None 3953 else: 3954 return None 3955 3956 type_token = self._prev.token_type 3957 3958 if type_token == TokenType.PSEUDO_TYPE: 3959 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3960 3961 if type_token == TokenType.OBJECT_IDENTIFIER: 3962 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3963 3964 nested = type_token in self.NESTED_TYPE_TOKENS 3965 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3966 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3967 expressions = None 3968 maybe_func = False 3969 3970 if self._match(TokenType.L_PAREN): 3971 if is_struct: 3972 expressions = self._parse_csv(self._parse_struct_types) 3973 elif nested: 3974 expressions = self._parse_csv( 3975 lambda: self._parse_types( 3976 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3977 ) 3978 ) 3979 elif type_token in self.ENUM_TYPE_TOKENS: 3980 expressions = self._parse_csv(self._parse_equality) 3981 elif is_aggregate: 3982 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3983 any_token=False, tokens=(TokenType.VAR,) 3984 ) 3985 if not func_or_ident or not self._match(TokenType.COMMA): 3986 return None 3987 expressions = self._parse_csv( 3988 lambda: self._parse_types( 3989 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3990 ) 3991 ) 3992 expressions.insert(0, func_or_ident) 3993 else: 3994 expressions = self._parse_csv(self._parse_type_size) 3995 3996 if not expressions or not self._match(TokenType.R_PAREN): 3997 self._retreat(index) 3998 return None 3999 4000 maybe_func = True 4001 4002 this: t.Optional[exp.Expression] = None 4003 values: t.Optional[t.List[exp.Expression]] = None 4004 4005 if nested and self._match(TokenType.LT): 4006 if is_struct: 4007 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4008 else: 4009 expressions = self._parse_csv( 4010 lambda: self._parse_types( 4011 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4012 ) 4013 ) 4014 4015 if not self._match(TokenType.GT): 4016 self.raise_error("Expecting >") 4017 4018 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4019 values = self._parse_csv(self._parse_conjunction) 4020 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4021 4022 if type_token in self.TIMESTAMPS: 4023 if self._match_text_seq("WITH", "TIME", "ZONE"): 4024 maybe_func = False 4025 tz_type = ( 4026 exp.DataType.Type.TIMETZ 4027 if type_token in self.TIMES 4028 else exp.DataType.Type.TIMESTAMPTZ 4029 ) 4030 this = exp.DataType(this=tz_type, expressions=expressions) 4031 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4032 maybe_func = False 4033 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4034 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4035 maybe_func = False 4036 elif type_token == TokenType.INTERVAL: 4037 unit = self._parse_var() 4038 4039 if self._match_text_seq("TO"): 4040 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4041 else: 4042 span = None 4043 4044 if span or not unit: 4045 this = self.expression( 4046 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4047 ) 4048 else: 4049 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4050 4051 if maybe_func and check_func: 4052 index2 = self._index 4053 peek = self._parse_string() 4054 4055 if not peek: 4056 self._retreat(index) 4057 return None 4058 4059 self._retreat(index2) 4060 4061 if not this: 4062 if self._match_text_seq("UNSIGNED"): 4063 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4064 if not unsigned_type_token: 4065 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4066 4067 type_token = unsigned_type_token or type_token 4068 4069 this = exp.DataType( 4070 this=exp.DataType.Type[type_token.value], 4071 expressions=expressions, 4072 nested=nested, 4073 values=values, 4074 prefix=prefix, 4075 ) 4076 4077 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4078 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4079 4080 return this 4081 4082 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4083 index = self._index 4084 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4085 self._match(TokenType.COLON) 4086 column_def = self._parse_column_def(this) 4087 4088 if type_required and ( 4089 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4090 ): 4091 self._retreat(index) 4092 return self._parse_types() 4093 4094 return column_def 4095 4096 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4097 if not self._match_text_seq("AT", "TIME", "ZONE"): 4098 return this 4099 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4100 4101 def _parse_column(self) -> t.Optional[exp.Expression]: 4102 this = self._parse_column_reference() 4103 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4104 4105 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4106 this = self._parse_field() 4107 if ( 4108 not this 4109 and self._match(TokenType.VALUES, advance=False) 4110 and self.VALUES_FOLLOWED_BY_PAREN 4111 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4112 ): 4113 this = self._parse_id_var() 4114 4115 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4116 4117 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4118 this = self._parse_bracket(this) 4119 4120 while self._match_set(self.COLUMN_OPERATORS): 4121 op_token = self._prev.token_type 4122 op = self.COLUMN_OPERATORS.get(op_token) 4123 4124 if op_token == TokenType.DCOLON: 4125 field = self._parse_types() 4126 if not field: 4127 self.raise_error("Expected type") 4128 elif op and self._curr: 4129 field = self._parse_column_reference() 4130 else: 4131 field = self._parse_field(anonymous_func=True, any_token=True) 4132 4133 if isinstance(field, exp.Func) and this: 4134 # bigquery allows function calls like x.y.count(...) 4135 # SAFE.SUBSTR(...) 4136 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4137 this = exp.replace_tree( 4138 this, 4139 lambda n: ( 4140 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4141 if n.table 4142 else n.this 4143 ) 4144 if isinstance(n, exp.Column) 4145 else n, 4146 ) 4147 4148 if op: 4149 this = op(self, this, field) 4150 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4151 this = self.expression( 4152 exp.Column, 4153 this=field, 4154 table=this.this, 4155 db=this.args.get("table"), 4156 catalog=this.args.get("db"), 4157 ) 4158 else: 4159 this = self.expression(exp.Dot, this=this, expression=field) 4160 this = self._parse_bracket(this) 4161 return this 4162 4163 def _parse_primary(self) -> t.Optional[exp.Expression]: 4164 if self._match_set(self.PRIMARY_PARSERS): 4165 token_type = self._prev.token_type 4166 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4167 4168 if token_type == TokenType.STRING: 4169 expressions = [primary] 4170 while self._match(TokenType.STRING): 4171 expressions.append(exp.Literal.string(self._prev.text)) 4172 4173 if len(expressions) > 1: 4174 return self.expression(exp.Concat, expressions=expressions) 4175 4176 return primary 4177 4178 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4179 return exp.Literal.number(f"0.{self._prev.text}") 4180 4181 if self._match(TokenType.L_PAREN): 4182 comments = self._prev_comments 4183 query = self._parse_select() 4184 4185 if query: 4186 expressions = [query] 4187 else: 4188 expressions = self._parse_expressions() 4189 4190 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4191 4192 if isinstance(this, exp.UNWRAPPED_QUERIES): 4193 this = self._parse_set_operations( 4194 self._parse_subquery(this=this, parse_alias=False) 4195 ) 4196 elif isinstance(this, exp.Subquery): 4197 this = self._parse_subquery( 4198 this=self._parse_set_operations(this), parse_alias=False 4199 ) 4200 elif len(expressions) > 1: 4201 this = self.expression(exp.Tuple, expressions=expressions) 4202 else: 4203 this = self.expression(exp.Paren, this=this) 4204 4205 if this: 4206 this.add_comments(comments) 4207 4208 self._match_r_paren(expression=this) 4209 return this 4210 4211 return None 4212 4213 def _parse_field( 4214 self, 4215 any_token: bool = False, 4216 tokens: t.Optional[t.Collection[TokenType]] = None, 4217 anonymous_func: bool = False, 4218 ) -> t.Optional[exp.Expression]: 4219 return ( 4220 self._parse_primary() 4221 or self._parse_function(anonymous=anonymous_func) 4222 or self._parse_id_var(any_token=any_token, tokens=tokens) 4223 ) 4224 4225 def _parse_function( 4226 self, 4227 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4228 anonymous: bool = False, 4229 optional_parens: bool = True, 4230 ) -> t.Optional[exp.Expression]: 4231 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4232 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4233 fn_syntax = False 4234 if ( 4235 self._match(TokenType.L_BRACE, advance=False) 4236 and self._next 4237 and self._next.text.upper() == "FN" 4238 ): 4239 self._advance(2) 4240 fn_syntax = True 4241 4242 func = self._parse_function_call( 4243 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4244 ) 4245 4246 if fn_syntax: 4247 self._match(TokenType.R_BRACE) 4248 4249 return func 4250 4251 def _parse_function_call( 4252 self, 4253 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4254 anonymous: bool = False, 4255 optional_parens: bool = True, 4256 ) -> t.Optional[exp.Expression]: 4257 if not self._curr: 4258 return None 4259 4260 comments = self._curr.comments 4261 token_type = self._curr.token_type 4262 this = self._curr.text 4263 upper = this.upper() 4264 4265 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4266 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4267 self._advance() 4268 return self._parse_window(parser(self)) 4269 4270 if not self._next or self._next.token_type != TokenType.L_PAREN: 4271 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4272 self._advance() 4273 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4274 4275 return None 4276 4277 if token_type not in self.FUNC_TOKENS: 4278 return None 4279 4280 self._advance(2) 4281 4282 parser = self.FUNCTION_PARSERS.get(upper) 4283 if parser and not anonymous: 4284 this = parser(self) 4285 else: 4286 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4287 4288 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4289 this = self.expression(subquery_predicate, this=self._parse_select()) 4290 self._match_r_paren() 4291 return this 4292 4293 if functions is None: 4294 functions = self.FUNCTIONS 4295 4296 function = functions.get(upper) 4297 4298 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4299 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4300 4301 if alias: 4302 args = self._kv_to_prop_eq(args) 4303 4304 if function and not anonymous: 4305 if "dialect" in function.__code__.co_varnames: 4306 func = function(args, dialect=self.dialect) 4307 else: 4308 func = function(args) 4309 4310 func = self.validate_expression(func, args) 4311 if not self.dialect.NORMALIZE_FUNCTIONS: 4312 func.meta["name"] = this 4313 4314 this = func 4315 else: 4316 if token_type == TokenType.IDENTIFIER: 4317 this = exp.Identifier(this=this, quoted=True) 4318 this = self.expression(exp.Anonymous, this=this, expressions=args) 4319 4320 if isinstance(this, exp.Expression): 4321 this.add_comments(comments) 4322 4323 self._match_r_paren(this) 4324 return self._parse_window(this) 4325 4326 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4327 transformed = [] 4328 4329 for e in expressions: 4330 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4331 if isinstance(e, exp.Alias): 4332 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4333 4334 if not isinstance(e, exp.PropertyEQ): 4335 e = self.expression( 4336 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4337 ) 4338 4339 if isinstance(e.this, exp.Column): 4340 e.this.replace(e.this.this) 4341 4342 transformed.append(e) 4343 4344 return transformed 4345 4346 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4347 return self._parse_column_def(self._parse_id_var()) 4348 4349 def _parse_user_defined_function( 4350 self, kind: t.Optional[TokenType] = None 4351 ) -> t.Optional[exp.Expression]: 4352 this = self._parse_id_var() 4353 4354 while self._match(TokenType.DOT): 4355 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4356 4357 if not self._match(TokenType.L_PAREN): 4358 return this 4359 4360 expressions = self._parse_csv(self._parse_function_parameter) 4361 self._match_r_paren() 4362 return self.expression( 4363 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4364 ) 4365 4366 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4367 literal = self._parse_primary() 4368 if literal: 4369 return self.expression(exp.Introducer, this=token.text, expression=literal) 4370 4371 return self.expression(exp.Identifier, this=token.text) 4372 4373 def _parse_session_parameter(self) -> exp.SessionParameter: 4374 kind = None 4375 this = self._parse_id_var() or self._parse_primary() 4376 4377 if this and self._match(TokenType.DOT): 4378 kind = this.name 4379 this = self._parse_var() or self._parse_primary() 4380 4381 return self.expression(exp.SessionParameter, this=this, kind=kind) 4382 4383 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4384 index = self._index 4385 4386 if self._match(TokenType.L_PAREN): 4387 expressions = t.cast( 4388 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4389 ) 4390 4391 if not self._match(TokenType.R_PAREN): 4392 self._retreat(index) 4393 else: 4394 expressions = [self._parse_id_var()] 4395 4396 if self._match_set(self.LAMBDAS): 4397 return self.LAMBDAS[self._prev.token_type](self, expressions) 4398 4399 self._retreat(index) 4400 4401 this: t.Optional[exp.Expression] 4402 4403 if self._match(TokenType.DISTINCT): 4404 this = self.expression( 4405 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4406 ) 4407 else: 4408 this = self._parse_select_or_expression(alias=alias) 4409 4410 return self._parse_limit( 4411 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4412 ) 4413 4414 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4415 index = self._index 4416 4417 if not self.errors: 4418 try: 4419 if self._parse_select(nested=True): 4420 return this 4421 except ParseError: 4422 pass 4423 finally: 4424 self.errors.clear() 4425 self._retreat(index) 4426 4427 if not self._match(TokenType.L_PAREN): 4428 return this 4429 4430 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4431 4432 self._match_r_paren() 4433 return self.expression(exp.Schema, this=this, expressions=args) 4434 4435 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4436 return self._parse_column_def(self._parse_field(any_token=True)) 4437 4438 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4439 # column defs are not really columns, they're identifiers 4440 if isinstance(this, exp.Column): 4441 this = this.this 4442 4443 kind = self._parse_types(schema=True) 4444 4445 if self._match_text_seq("FOR", "ORDINALITY"): 4446 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4447 4448 constraints: t.List[exp.Expression] = [] 4449 4450 if not kind and self._match(TokenType.ALIAS): 4451 constraints.append( 4452 self.expression( 4453 exp.ComputedColumnConstraint, 4454 this=self._parse_conjunction(), 4455 persisted=self._match_text_seq("PERSISTED"), 4456 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4457 ) 4458 ) 4459 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4460 self._match(TokenType.ALIAS) 4461 constraints.append( 4462 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4463 ) 4464 4465 while True: 4466 constraint = self._parse_column_constraint() 4467 if not constraint: 4468 break 4469 constraints.append(constraint) 4470 4471 if not kind and not constraints: 4472 return this 4473 4474 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4475 4476 def _parse_auto_increment( 4477 self, 4478 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4479 start = None 4480 increment = None 4481 4482 if self._match(TokenType.L_PAREN, advance=False): 4483 args = self._parse_wrapped_csv(self._parse_bitwise) 4484 start = seq_get(args, 0) 4485 increment = seq_get(args, 1) 4486 elif self._match_text_seq("START"): 4487 start = self._parse_bitwise() 4488 self._match_text_seq("INCREMENT") 4489 increment = self._parse_bitwise() 4490 4491 if start and increment: 4492 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4493 4494 return exp.AutoIncrementColumnConstraint() 4495 4496 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4497 if not self._match_text_seq("REFRESH"): 4498 self._retreat(self._index - 1) 4499 return None 4500 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4501 4502 def _parse_compress(self) -> exp.CompressColumnConstraint: 4503 if self._match(TokenType.L_PAREN, advance=False): 4504 return self.expression( 4505 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4506 ) 4507 4508 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4509 4510 def _parse_generated_as_identity( 4511 self, 4512 ) -> ( 4513 exp.GeneratedAsIdentityColumnConstraint 4514 | exp.ComputedColumnConstraint 4515 | exp.GeneratedAsRowColumnConstraint 4516 ): 4517 if self._match_text_seq("BY", "DEFAULT"): 4518 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4519 this = self.expression( 4520 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4521 ) 4522 else: 4523 self._match_text_seq("ALWAYS") 4524 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4525 4526 self._match(TokenType.ALIAS) 4527 4528 if self._match_text_seq("ROW"): 4529 start = self._match_text_seq("START") 4530 if not start: 4531 self._match(TokenType.END) 4532 hidden = self._match_text_seq("HIDDEN") 4533 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4534 4535 identity = self._match_text_seq("IDENTITY") 4536 4537 if self._match(TokenType.L_PAREN): 4538 if self._match(TokenType.START_WITH): 4539 this.set("start", self._parse_bitwise()) 4540 if self._match_text_seq("INCREMENT", "BY"): 4541 this.set("increment", self._parse_bitwise()) 4542 if self._match_text_seq("MINVALUE"): 4543 this.set("minvalue", self._parse_bitwise()) 4544 if self._match_text_seq("MAXVALUE"): 4545 this.set("maxvalue", self._parse_bitwise()) 4546 4547 if self._match_text_seq("CYCLE"): 4548 this.set("cycle", True) 4549 elif self._match_text_seq("NO", "CYCLE"): 4550 this.set("cycle", False) 4551 4552 if not identity: 4553 this.set("expression", self._parse_bitwise()) 4554 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4555 args = self._parse_csv(self._parse_bitwise) 4556 this.set("start", seq_get(args, 0)) 4557 this.set("increment", seq_get(args, 1)) 4558 4559 self._match_r_paren() 4560 4561 return this 4562 4563 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4564 self._match_text_seq("LENGTH") 4565 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4566 4567 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4568 if self._match_text_seq("NULL"): 4569 return self.expression(exp.NotNullColumnConstraint) 4570 if self._match_text_seq("CASESPECIFIC"): 4571 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4572 if self._match_text_seq("FOR", "REPLICATION"): 4573 return self.expression(exp.NotForReplicationColumnConstraint) 4574 return None 4575 4576 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4577 if self._match(TokenType.CONSTRAINT): 4578 this = self._parse_id_var() 4579 else: 4580 this = None 4581 4582 if self._match_texts(self.CONSTRAINT_PARSERS): 4583 return self.expression( 4584 exp.ColumnConstraint, 4585 this=this, 4586 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4587 ) 4588 4589 return this 4590 4591 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4592 if not self._match(TokenType.CONSTRAINT): 4593 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4594 4595 return self.expression( 4596 exp.Constraint, 4597 this=self._parse_id_var(), 4598 expressions=self._parse_unnamed_constraints(), 4599 ) 4600 4601 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4602 constraints = [] 4603 while True: 4604 constraint = self._parse_unnamed_constraint() or self._parse_function() 4605 if not constraint: 4606 break 4607 constraints.append(constraint) 4608 4609 return constraints 4610 4611 def _parse_unnamed_constraint( 4612 self, constraints: t.Optional[t.Collection[str]] = None 4613 ) -> t.Optional[exp.Expression]: 4614 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4615 constraints or self.CONSTRAINT_PARSERS 4616 ): 4617 return None 4618 4619 constraint = self._prev.text.upper() 4620 if constraint not in self.CONSTRAINT_PARSERS: 4621 self.raise_error(f"No parser found for schema constraint {constraint}.") 4622 4623 return self.CONSTRAINT_PARSERS[constraint](self) 4624 4625 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4626 self._match_text_seq("KEY") 4627 return self.expression( 4628 exp.UniqueColumnConstraint, 4629 this=self._parse_schema(self._parse_id_var(any_token=False)), 4630 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4631 on_conflict=self._parse_on_conflict(), 4632 ) 4633 4634 def _parse_key_constraint_options(self) -> t.List[str]: 4635 options = [] 4636 while True: 4637 if not self._curr: 4638 break 4639 4640 if self._match(TokenType.ON): 4641 action = None 4642 on = self._advance_any() and self._prev.text 4643 4644 if self._match_text_seq("NO", "ACTION"): 4645 action = "NO ACTION" 4646 elif self._match_text_seq("CASCADE"): 4647 action = "CASCADE" 4648 elif self._match_text_seq("RESTRICT"): 4649 action = "RESTRICT" 4650 elif self._match_pair(TokenType.SET, TokenType.NULL): 4651 action = "SET NULL" 4652 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4653 action = "SET DEFAULT" 4654 else: 4655 self.raise_error("Invalid key constraint") 4656 4657 options.append(f"ON {on} {action}") 4658 elif self._match_text_seq("NOT", "ENFORCED"): 4659 options.append("NOT ENFORCED") 4660 elif self._match_text_seq("DEFERRABLE"): 4661 options.append("DEFERRABLE") 4662 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4663 options.append("INITIALLY DEFERRED") 4664 elif self._match_text_seq("NORELY"): 4665 options.append("NORELY") 4666 elif self._match_text_seq("MATCH", "FULL"): 4667 options.append("MATCH FULL") 4668 else: 4669 break 4670 4671 return options 4672 4673 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4674 if match and not self._match(TokenType.REFERENCES): 4675 return None 4676 4677 expressions = None 4678 this = self._parse_table(schema=True) 4679 options = self._parse_key_constraint_options() 4680 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4681 4682 def _parse_foreign_key(self) -> exp.ForeignKey: 4683 expressions = self._parse_wrapped_id_vars() 4684 reference = self._parse_references() 4685 options = {} 4686 4687 while self._match(TokenType.ON): 4688 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4689 self.raise_error("Expected DELETE or UPDATE") 4690 4691 kind = self._prev.text.lower() 4692 4693 if self._match_text_seq("NO", "ACTION"): 4694 action = "NO ACTION" 4695 elif self._match(TokenType.SET): 4696 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4697 action = "SET " + self._prev.text.upper() 4698 else: 4699 self._advance() 4700 action = self._prev.text.upper() 4701 4702 options[kind] = action 4703 4704 return self.expression( 4705 exp.ForeignKey, 4706 expressions=expressions, 4707 reference=reference, 4708 **options, # type: ignore 4709 ) 4710 4711 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4712 return self._parse_field() 4713 4714 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4715 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4716 self._retreat(self._index - 1) 4717 return None 4718 4719 id_vars = self._parse_wrapped_id_vars() 4720 return self.expression( 4721 exp.PeriodForSystemTimeConstraint, 4722 this=seq_get(id_vars, 0), 4723 expression=seq_get(id_vars, 1), 4724 ) 4725 4726 def _parse_primary_key( 4727 self, wrapped_optional: bool = False, in_props: bool = False 4728 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4729 desc = ( 4730 self._match_set((TokenType.ASC, TokenType.DESC)) 4731 and self._prev.token_type == TokenType.DESC 4732 ) 4733 4734 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4735 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4736 4737 expressions = self._parse_wrapped_csv( 4738 self._parse_primary_key_part, optional=wrapped_optional 4739 ) 4740 options = self._parse_key_constraint_options() 4741 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4742 4743 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4744 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4745 4746 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4747 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4748 return this 4749 4750 bracket_kind = self._prev.token_type 4751 expressions = self._parse_csv( 4752 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4753 ) 4754 4755 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4756 self.raise_error("Expected ]") 4757 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4758 self.raise_error("Expected }") 4759 4760 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4761 if bracket_kind == TokenType.L_BRACE: 4762 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4763 elif not this or this.name.upper() == "ARRAY": 4764 this = self.expression(exp.Array, expressions=expressions) 4765 else: 4766 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4767 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4768 4769 self._add_comments(this) 4770 return self._parse_bracket(this) 4771 4772 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4773 if self._match(TokenType.COLON): 4774 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4775 return this 4776 4777 def _parse_case(self) -> t.Optional[exp.Expression]: 4778 ifs = [] 4779 default = None 4780 4781 comments = self._prev_comments 4782 expression = self._parse_conjunction() 4783 4784 while self._match(TokenType.WHEN): 4785 this = self._parse_conjunction() 4786 self._match(TokenType.THEN) 4787 then = self._parse_conjunction() 4788 ifs.append(self.expression(exp.If, this=this, true=then)) 4789 4790 if self._match(TokenType.ELSE): 4791 default = self._parse_conjunction() 4792 4793 if not self._match(TokenType.END): 4794 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4795 default = exp.column("interval") 4796 else: 4797 self.raise_error("Expected END after CASE", self._prev) 4798 4799 return self.expression( 4800 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4801 ) 4802 4803 def _parse_if(self) -> t.Optional[exp.Expression]: 4804 if self._match(TokenType.L_PAREN): 4805 args = self._parse_csv(self._parse_conjunction) 4806 this = self.validate_expression(exp.If.from_arg_list(args), args) 4807 self._match_r_paren() 4808 else: 4809 index = self._index - 1 4810 4811 if self.NO_PAREN_IF_COMMANDS and index == 0: 4812 return self._parse_as_command(self._prev) 4813 4814 condition = self._parse_conjunction() 4815 4816 if not condition: 4817 self._retreat(index) 4818 return None 4819 4820 self._match(TokenType.THEN) 4821 true = self._parse_conjunction() 4822 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4823 self._match(TokenType.END) 4824 this = self.expression(exp.If, this=condition, true=true, false=false) 4825 4826 return this 4827 4828 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4829 if not self._match_text_seq("VALUE", "FOR"): 4830 self._retreat(self._index - 1) 4831 return None 4832 4833 return self.expression( 4834 exp.NextValueFor, 4835 this=self._parse_column(), 4836 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4837 ) 4838 4839 def _parse_extract(self) -> exp.Extract: 4840 this = self._parse_function() or self._parse_var() or self._parse_type() 4841 4842 if self._match(TokenType.FROM): 4843 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4844 4845 if not self._match(TokenType.COMMA): 4846 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4847 4848 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4849 4850 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4851 this = self._parse_conjunction() 4852 4853 if not self._match(TokenType.ALIAS): 4854 if self._match(TokenType.COMMA): 4855 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4856 4857 self.raise_error("Expected AS after CAST") 4858 4859 fmt = None 4860 to = self._parse_types() 4861 4862 if self._match(TokenType.FORMAT): 4863 fmt_string = self._parse_string() 4864 fmt = self._parse_at_time_zone(fmt_string) 4865 4866 if not to: 4867 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4868 if to.this in exp.DataType.TEMPORAL_TYPES: 4869 this = self.expression( 4870 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4871 this=this, 4872 format=exp.Literal.string( 4873 format_time( 4874 fmt_string.this if fmt_string else "", 4875 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4876 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4877 ) 4878 ), 4879 ) 4880 4881 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4882 this.set("zone", fmt.args["zone"]) 4883 return this 4884 elif not to: 4885 self.raise_error("Expected TYPE after CAST") 4886 elif isinstance(to, exp.Identifier): 4887 to = exp.DataType.build(to.name, udt=True) 4888 elif to.this == exp.DataType.Type.CHAR: 4889 if self._match(TokenType.CHARACTER_SET): 4890 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4891 4892 return self.expression( 4893 exp.Cast if strict else exp.TryCast, 4894 this=this, 4895 to=to, 4896 format=fmt, 4897 safe=safe, 4898 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4899 ) 4900 4901 def _parse_string_agg(self) -> exp.Expression: 4902 if self._match(TokenType.DISTINCT): 4903 args: t.List[t.Optional[exp.Expression]] = [ 4904 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4905 ] 4906 if self._match(TokenType.COMMA): 4907 args.extend(self._parse_csv(self._parse_conjunction)) 4908 else: 4909 args = self._parse_csv(self._parse_conjunction) # type: ignore 4910 4911 index = self._index 4912 if not self._match(TokenType.R_PAREN) and args: 4913 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4914 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4915 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4916 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4917 4918 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4919 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4920 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4921 if not self._match_text_seq("WITHIN", "GROUP"): 4922 self._retreat(index) 4923 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4924 4925 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4926 order = self._parse_order(this=seq_get(args, 0)) 4927 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4928 4929 def _parse_convert( 4930 self, strict: bool, safe: t.Optional[bool] = None 4931 ) -> t.Optional[exp.Expression]: 4932 this = self._parse_bitwise() 4933 4934 if self._match(TokenType.USING): 4935 to: t.Optional[exp.Expression] = self.expression( 4936 exp.CharacterSet, this=self._parse_var() 4937 ) 4938 elif self._match(TokenType.COMMA): 4939 to = self._parse_types() 4940 else: 4941 to = None 4942 4943 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4944 4945 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4946 """ 4947 There are generally two variants of the DECODE function: 4948 4949 - DECODE(bin, charset) 4950 - DECODE(expression, search, result [, search, result] ... [, default]) 4951 4952 The second variant will always be parsed into a CASE expression. Note that NULL 4953 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4954 instead of relying on pattern matching. 4955 """ 4956 args = self._parse_csv(self._parse_conjunction) 4957 4958 if len(args) < 3: 4959 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4960 4961 expression, *expressions = args 4962 if not expression: 4963 return None 4964 4965 ifs = [] 4966 for search, result in zip(expressions[::2], expressions[1::2]): 4967 if not search or not result: 4968 return None 4969 4970 if isinstance(search, exp.Literal): 4971 ifs.append( 4972 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4973 ) 4974 elif isinstance(search, exp.Null): 4975 ifs.append( 4976 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4977 ) 4978 else: 4979 cond = exp.or_( 4980 exp.EQ(this=expression.copy(), expression=search), 4981 exp.and_( 4982 exp.Is(this=expression.copy(), expression=exp.Null()), 4983 exp.Is(this=search.copy(), expression=exp.Null()), 4984 copy=False, 4985 ), 4986 copy=False, 4987 ) 4988 ifs.append(exp.If(this=cond, true=result)) 4989 4990 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4991 4992 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4993 self._match_text_seq("KEY") 4994 key = self._parse_column() 4995 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4996 self._match_text_seq("VALUE") 4997 value = self._parse_bitwise() 4998 4999 if not key and not value: 5000 return None 5001 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5002 5003 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5004 if not this or not self._match_text_seq("FORMAT", "JSON"): 5005 return this 5006 5007 return self.expression(exp.FormatJson, this=this) 5008 5009 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5010 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5011 for value in values: 5012 if self._match_text_seq(value, "ON", on): 5013 return f"{value} ON {on}" 5014 5015 return None 5016 5017 @t.overload 5018 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5019 5020 @t.overload 5021 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5022 5023 def _parse_json_object(self, agg=False): 5024 star = self._parse_star() 5025 expressions = ( 5026 [star] 5027 if star 5028 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5029 ) 5030 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5031 5032 unique_keys = None 5033 if self._match_text_seq("WITH", "UNIQUE"): 5034 unique_keys = True 5035 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5036 unique_keys = False 5037 5038 self._match_text_seq("KEYS") 5039 5040 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5041 self._parse_type() 5042 ) 5043 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5044 5045 return self.expression( 5046 exp.JSONObjectAgg if agg else exp.JSONObject, 5047 expressions=expressions, 5048 null_handling=null_handling, 5049 unique_keys=unique_keys, 5050 return_type=return_type, 5051 encoding=encoding, 5052 ) 5053 5054 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5055 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5056 if not self._match_text_seq("NESTED"): 5057 this = self._parse_id_var() 5058 kind = self._parse_types(allow_identifiers=False) 5059 nested = None 5060 else: 5061 this = None 5062 kind = None 5063 nested = True 5064 5065 path = self._match_text_seq("PATH") and self._parse_string() 5066 nested_schema = nested and self._parse_json_schema() 5067 5068 return self.expression( 5069 exp.JSONColumnDef, 5070 this=this, 5071 kind=kind, 5072 path=path, 5073 nested_schema=nested_schema, 5074 ) 5075 5076 def _parse_json_schema(self) -> exp.JSONSchema: 5077 self._match_text_seq("COLUMNS") 5078 return self.expression( 5079 exp.JSONSchema, 5080 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5081 ) 5082 5083 def _parse_json_table(self) -> exp.JSONTable: 5084 this = self._parse_format_json(self._parse_bitwise()) 5085 path = self._match(TokenType.COMMA) and self._parse_string() 5086 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5087 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5088 schema = self._parse_json_schema() 5089 5090 return exp.JSONTable( 5091 this=this, 5092 schema=schema, 5093 path=path, 5094 error_handling=error_handling, 5095 empty_handling=empty_handling, 5096 ) 5097 5098 def _parse_match_against(self) -> exp.MatchAgainst: 5099 expressions = self._parse_csv(self._parse_column) 5100 5101 self._match_text_seq(")", "AGAINST", "(") 5102 5103 this = self._parse_string() 5104 5105 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5106 modifier = "IN NATURAL LANGUAGE MODE" 5107 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5108 modifier = f"{modifier} WITH QUERY EXPANSION" 5109 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5110 modifier = "IN BOOLEAN MODE" 5111 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5112 modifier = "WITH QUERY EXPANSION" 5113 else: 5114 modifier = None 5115 5116 return self.expression( 5117 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5118 ) 5119 5120 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5121 def _parse_open_json(self) -> exp.OpenJSON: 5122 this = self._parse_bitwise() 5123 path = self._match(TokenType.COMMA) and self._parse_string() 5124 5125 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5126 this = self._parse_field(any_token=True) 5127 kind = self._parse_types() 5128 path = self._parse_string() 5129 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5130 5131 return self.expression( 5132 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5133 ) 5134 5135 expressions = None 5136 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5137 self._match_l_paren() 5138 expressions = self._parse_csv(_parse_open_json_column_def) 5139 5140 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5141 5142 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5143 args = self._parse_csv(self._parse_bitwise) 5144 5145 if self._match(TokenType.IN): 5146 return self.expression( 5147 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5148 ) 5149 5150 if haystack_first: 5151 haystack = seq_get(args, 0) 5152 needle = seq_get(args, 1) 5153 else: 5154 needle = seq_get(args, 0) 5155 haystack = seq_get(args, 1) 5156 5157 return self.expression( 5158 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5159 ) 5160 5161 def _parse_predict(self) -> exp.Predict: 5162 self._match_text_seq("MODEL") 5163 this = self._parse_table() 5164 5165 self._match(TokenType.COMMA) 5166 self._match_text_seq("TABLE") 5167 5168 return self.expression( 5169 exp.Predict, 5170 this=this, 5171 expression=self._parse_table(), 5172 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5173 ) 5174 5175 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5176 args = self._parse_csv(self._parse_table) 5177 return exp.JoinHint(this=func_name.upper(), expressions=args) 5178 5179 def _parse_substring(self) -> exp.Substring: 5180 # Postgres supports the form: substring(string [from int] [for int]) 5181 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5182 5183 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5184 5185 if self._match(TokenType.FROM): 5186 args.append(self._parse_bitwise()) 5187 if self._match(TokenType.FOR): 5188 args.append(self._parse_bitwise()) 5189 5190 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5191 5192 def _parse_trim(self) -> exp.Trim: 5193 # https://www.w3resource.com/sql/character-functions/trim.php 5194 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5195 5196 position = None 5197 collation = None 5198 expression = None 5199 5200 if self._match_texts(self.TRIM_TYPES): 5201 position = self._prev.text.upper() 5202 5203 this = self._parse_bitwise() 5204 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5205 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5206 expression = self._parse_bitwise() 5207 5208 if invert_order: 5209 this, expression = expression, this 5210 5211 if self._match(TokenType.COLLATE): 5212 collation = self._parse_bitwise() 5213 5214 return self.expression( 5215 exp.Trim, this=this, position=position, expression=expression, collation=collation 5216 ) 5217 5218 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5219 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5220 5221 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5222 return self._parse_window(self._parse_id_var(), alias=True) 5223 5224 def _parse_respect_or_ignore_nulls( 5225 self, this: t.Optional[exp.Expression] 5226 ) -> t.Optional[exp.Expression]: 5227 if self._match_text_seq("IGNORE", "NULLS"): 5228 return self.expression(exp.IgnoreNulls, this=this) 5229 if self._match_text_seq("RESPECT", "NULLS"): 5230 return self.expression(exp.RespectNulls, this=this) 5231 return this 5232 5233 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5234 if self._match(TokenType.HAVING): 5235 self._match_texts(("MAX", "MIN")) 5236 max = self._prev.text.upper() != "MIN" 5237 return self.expression( 5238 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5239 ) 5240 5241 return this 5242 5243 def _parse_window( 5244 self, this: t.Optional[exp.Expression], alias: bool = False 5245 ) -> t.Optional[exp.Expression]: 5246 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5247 self._match(TokenType.WHERE) 5248 this = self.expression( 5249 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5250 ) 5251 self._match_r_paren() 5252 5253 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5254 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5255 if self._match_text_seq("WITHIN", "GROUP"): 5256 order = self._parse_wrapped(self._parse_order) 5257 this = self.expression(exp.WithinGroup, this=this, expression=order) 5258 5259 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5260 # Some dialects choose to implement and some do not. 5261 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5262 5263 # There is some code above in _parse_lambda that handles 5264 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5265 5266 # The below changes handle 5267 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5268 5269 # Oracle allows both formats 5270 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5271 # and Snowflake chose to do the same for familiarity 5272 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5273 if isinstance(this, exp.AggFunc): 5274 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5275 5276 if ignore_respect and ignore_respect is not this: 5277 ignore_respect.replace(ignore_respect.this) 5278 this = self.expression(ignore_respect.__class__, this=this) 5279 5280 this = self._parse_respect_or_ignore_nulls(this) 5281 5282 # bigquery select from window x AS (partition by ...) 5283 if alias: 5284 over = None 5285 self._match(TokenType.ALIAS) 5286 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5287 return this 5288 else: 5289 over = self._prev.text.upper() 5290 5291 if not self._match(TokenType.L_PAREN): 5292 return self.expression( 5293 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5294 ) 5295 5296 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5297 5298 first = self._match(TokenType.FIRST) 5299 if self._match_text_seq("LAST"): 5300 first = False 5301 5302 partition, order = self._parse_partition_and_order() 5303 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5304 5305 if kind: 5306 self._match(TokenType.BETWEEN) 5307 start = self._parse_window_spec() 5308 self._match(TokenType.AND) 5309 end = self._parse_window_spec() 5310 5311 spec = self.expression( 5312 exp.WindowSpec, 5313 kind=kind, 5314 start=start["value"], 5315 start_side=start["side"], 5316 end=end["value"], 5317 end_side=end["side"], 5318 ) 5319 else: 5320 spec = None 5321 5322 self._match_r_paren() 5323 5324 window = self.expression( 5325 exp.Window, 5326 this=this, 5327 partition_by=partition, 5328 order=order, 5329 spec=spec, 5330 alias=window_alias, 5331 over=over, 5332 first=first, 5333 ) 5334 5335 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5336 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5337 return self._parse_window(window, alias=alias) 5338 5339 return window 5340 5341 def _parse_partition_and_order( 5342 self, 5343 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5344 return self._parse_partition_by(), self._parse_order() 5345 5346 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5347 self._match(TokenType.BETWEEN) 5348 5349 return { 5350 "value": ( 5351 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5352 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5353 or self._parse_bitwise() 5354 ), 5355 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5356 } 5357 5358 def _parse_alias( 5359 self, this: t.Optional[exp.Expression], explicit: bool = False 5360 ) -> t.Optional[exp.Expression]: 5361 any_token = self._match(TokenType.ALIAS) 5362 comments = self._prev_comments 5363 5364 if explicit and not any_token: 5365 return this 5366 5367 if self._match(TokenType.L_PAREN): 5368 aliases = self.expression( 5369 exp.Aliases, 5370 comments=comments, 5371 this=this, 5372 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5373 ) 5374 self._match_r_paren(aliases) 5375 return aliases 5376 5377 alias = self._parse_id_var(any_token) or ( 5378 self.STRING_ALIASES and self._parse_string_as_identifier() 5379 ) 5380 5381 if alias: 5382 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5383 column = this.this 5384 5385 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5386 if not this.comments and column and column.comments: 5387 this.comments = column.comments 5388 column.comments = None 5389 5390 return this 5391 5392 def _parse_id_var( 5393 self, 5394 any_token: bool = True, 5395 tokens: t.Optional[t.Collection[TokenType]] = None, 5396 ) -> t.Optional[exp.Expression]: 5397 identifier = self._parse_identifier() 5398 5399 if identifier: 5400 return identifier 5401 5402 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5403 quoted = self._prev.token_type == TokenType.STRING 5404 return exp.Identifier(this=self._prev.text, quoted=quoted) 5405 5406 return None 5407 5408 def _parse_string(self) -> t.Optional[exp.Expression]: 5409 if self._match_set(self.STRING_PARSERS): 5410 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5411 return self._parse_placeholder() 5412 5413 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5414 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5415 5416 def _parse_number(self) -> t.Optional[exp.Expression]: 5417 if self._match_set(self.NUMERIC_PARSERS): 5418 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5419 return self._parse_placeholder() 5420 5421 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5422 if self._match(TokenType.IDENTIFIER): 5423 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5424 return self._parse_placeholder() 5425 5426 def _parse_var( 5427 self, 5428 any_token: bool = False, 5429 tokens: t.Optional[t.Collection[TokenType]] = None, 5430 upper: bool = False, 5431 ) -> t.Optional[exp.Expression]: 5432 if ( 5433 (any_token and self._advance_any()) 5434 or self._match(TokenType.VAR) 5435 or (self._match_set(tokens) if tokens else False) 5436 ): 5437 return self.expression( 5438 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5439 ) 5440 return self._parse_placeholder() 5441 5442 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5443 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5444 self._advance() 5445 return self._prev 5446 return None 5447 5448 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5449 return self._parse_var() or self._parse_string() 5450 5451 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5452 return self._parse_primary() or self._parse_var(any_token=True) 5453 5454 def _parse_null(self) -> t.Optional[exp.Expression]: 5455 if self._match_set(self.NULL_TOKENS): 5456 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5457 return self._parse_placeholder() 5458 5459 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5460 if self._match(TokenType.TRUE): 5461 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5462 if self._match(TokenType.FALSE): 5463 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5464 return self._parse_placeholder() 5465 5466 def _parse_star(self) -> t.Optional[exp.Expression]: 5467 if self._match(TokenType.STAR): 5468 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5469 return self._parse_placeholder() 5470 5471 def _parse_parameter(self) -> exp.Parameter: 5472 self._match(TokenType.L_BRACE) 5473 this = self._parse_identifier() or self._parse_primary_or_var() 5474 expression = self._match(TokenType.COLON) and ( 5475 self._parse_identifier() or self._parse_primary_or_var() 5476 ) 5477 self._match(TokenType.R_BRACE) 5478 return self.expression(exp.Parameter, this=this, expression=expression) 5479 5480 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5481 if self._match_set(self.PLACEHOLDER_PARSERS): 5482 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5483 if placeholder: 5484 return placeholder 5485 self._advance(-1) 5486 return None 5487 5488 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5489 if not self._match(TokenType.EXCEPT): 5490 return None 5491 if self._match(TokenType.L_PAREN, advance=False): 5492 return self._parse_wrapped_csv(self._parse_column) 5493 5494 except_column = self._parse_column() 5495 return [except_column] if except_column else None 5496 5497 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5498 if not self._match(TokenType.REPLACE): 5499 return None 5500 if self._match(TokenType.L_PAREN, advance=False): 5501 return self._parse_wrapped_csv(self._parse_expression) 5502 5503 replace_expression = self._parse_expression() 5504 return [replace_expression] if replace_expression else None 5505 5506 def _parse_csv( 5507 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5508 ) -> t.List[exp.Expression]: 5509 parse_result = parse_method() 5510 items = [parse_result] if parse_result is not None else [] 5511 5512 while self._match(sep): 5513 self._add_comments(parse_result) 5514 parse_result = parse_method() 5515 if parse_result is not None: 5516 items.append(parse_result) 5517 5518 return items 5519 5520 def _parse_tokens( 5521 self, parse_method: t.Callable, expressions: t.Dict 5522 ) -> t.Optional[exp.Expression]: 5523 this = parse_method() 5524 5525 while self._match_set(expressions): 5526 this = self.expression( 5527 expressions[self._prev.token_type], 5528 this=this, 5529 comments=self._prev_comments, 5530 expression=parse_method(), 5531 ) 5532 5533 return this 5534 5535 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5536 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5537 5538 def _parse_wrapped_csv( 5539 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5540 ) -> t.List[exp.Expression]: 5541 return self._parse_wrapped( 5542 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5543 ) 5544 5545 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5546 wrapped = self._match(TokenType.L_PAREN) 5547 if not wrapped and not optional: 5548 self.raise_error("Expecting (") 5549 parse_result = parse_method() 5550 if wrapped: 5551 self._match_r_paren() 5552 return parse_result 5553 5554 def _parse_expressions(self) -> t.List[exp.Expression]: 5555 return self._parse_csv(self._parse_expression) 5556 5557 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5558 return self._parse_select() or self._parse_set_operations( 5559 self._parse_expression() if alias else self._parse_conjunction() 5560 ) 5561 5562 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5563 return self._parse_query_modifiers( 5564 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5565 ) 5566 5567 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5568 this = None 5569 if self._match_texts(self.TRANSACTION_KIND): 5570 this = self._prev.text 5571 5572 self._match_texts(("TRANSACTION", "WORK")) 5573 5574 modes = [] 5575 while True: 5576 mode = [] 5577 while self._match(TokenType.VAR): 5578 mode.append(self._prev.text) 5579 5580 if mode: 5581 modes.append(" ".join(mode)) 5582 if not self._match(TokenType.COMMA): 5583 break 5584 5585 return self.expression(exp.Transaction, this=this, modes=modes) 5586 5587 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5588 chain = None 5589 savepoint = None 5590 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5591 5592 self._match_texts(("TRANSACTION", "WORK")) 5593 5594 if self._match_text_seq("TO"): 5595 self._match_text_seq("SAVEPOINT") 5596 savepoint = self._parse_id_var() 5597 5598 if self._match(TokenType.AND): 5599 chain = not self._match_text_seq("NO") 5600 self._match_text_seq("CHAIN") 5601 5602 if is_rollback: 5603 return self.expression(exp.Rollback, savepoint=savepoint) 5604 5605 return self.expression(exp.Commit, chain=chain) 5606 5607 def _parse_refresh(self) -> exp.Refresh: 5608 self._match(TokenType.TABLE) 5609 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5610 5611 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5612 if not self._match_text_seq("ADD"): 5613 return None 5614 5615 self._match(TokenType.COLUMN) 5616 exists_column = self._parse_exists(not_=True) 5617 expression = self._parse_field_def() 5618 5619 if expression: 5620 expression.set("exists", exists_column) 5621 5622 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5623 if self._match_texts(("FIRST", "AFTER")): 5624 position = self._prev.text 5625 column_position = self.expression( 5626 exp.ColumnPosition, this=self._parse_column(), position=position 5627 ) 5628 expression.set("position", column_position) 5629 5630 return expression 5631 5632 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5633 drop = self._match(TokenType.DROP) and self._parse_drop() 5634 if drop and not isinstance(drop, exp.Command): 5635 drop.set("kind", drop.args.get("kind", "COLUMN")) 5636 return drop 5637 5638 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5639 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5640 return self.expression( 5641 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5642 ) 5643 5644 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5645 index = self._index - 1 5646 5647 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5648 return self._parse_csv( 5649 lambda: self.expression( 5650 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5651 ) 5652 ) 5653 5654 self._retreat(index) 5655 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5656 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5657 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5658 5659 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5660 self._match(TokenType.COLUMN) 5661 column = self._parse_field(any_token=True) 5662 5663 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5664 return self.expression(exp.AlterColumn, this=column, drop=True) 5665 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5666 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5667 if self._match(TokenType.COMMENT): 5668 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5669 5670 self._match_text_seq("SET", "DATA") 5671 return self.expression( 5672 exp.AlterColumn, 5673 this=column, 5674 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5675 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5676 using=self._match(TokenType.USING) and self._parse_conjunction(), 5677 ) 5678 5679 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5680 index = self._index - 1 5681 5682 partition_exists = self._parse_exists() 5683 if self._match(TokenType.PARTITION, advance=False): 5684 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5685 5686 self._retreat(index) 5687 return self._parse_csv(self._parse_drop_column) 5688 5689 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5690 if self._match(TokenType.COLUMN): 5691 exists = self._parse_exists() 5692 old_column = self._parse_column() 5693 to = self._match_text_seq("TO") 5694 new_column = self._parse_column() 5695 5696 if old_column is None or to is None or new_column is None: 5697 return None 5698 5699 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5700 5701 self._match_text_seq("TO") 5702 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5703 5704 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5705 start = self._prev 5706 5707 if not self._match(TokenType.TABLE): 5708 return self._parse_as_command(start) 5709 5710 exists = self._parse_exists() 5711 only = self._match_text_seq("ONLY") 5712 this = self._parse_table(schema=True) 5713 5714 if self._next: 5715 self._advance() 5716 5717 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5718 if parser: 5719 actions = ensure_list(parser(self)) 5720 options = self._parse_csv(self._parse_property) 5721 5722 if not self._curr and actions: 5723 return self.expression( 5724 exp.AlterTable, 5725 this=this, 5726 exists=exists, 5727 actions=actions, 5728 only=only, 5729 options=options, 5730 ) 5731 5732 return self._parse_as_command(start) 5733 5734 def _parse_merge(self) -> exp.Merge: 5735 self._match(TokenType.INTO) 5736 target = self._parse_table() 5737 5738 if target and self._match(TokenType.ALIAS, advance=False): 5739 target.set("alias", self._parse_table_alias()) 5740 5741 self._match(TokenType.USING) 5742 using = self._parse_table() 5743 5744 self._match(TokenType.ON) 5745 on = self._parse_conjunction() 5746 5747 return self.expression( 5748 exp.Merge, 5749 this=target, 5750 using=using, 5751 on=on, 5752 expressions=self._parse_when_matched(), 5753 ) 5754 5755 def _parse_when_matched(self) -> t.List[exp.When]: 5756 whens = [] 5757 5758 while self._match(TokenType.WHEN): 5759 matched = not self._match(TokenType.NOT) 5760 self._match_text_seq("MATCHED") 5761 source = ( 5762 False 5763 if self._match_text_seq("BY", "TARGET") 5764 else self._match_text_seq("BY", "SOURCE") 5765 ) 5766 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5767 5768 self._match(TokenType.THEN) 5769 5770 if self._match(TokenType.INSERT): 5771 _this = self._parse_star() 5772 if _this: 5773 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5774 else: 5775 then = self.expression( 5776 exp.Insert, 5777 this=self._parse_value(), 5778 expression=self._match_text_seq("VALUES") and self._parse_value(), 5779 ) 5780 elif self._match(TokenType.UPDATE): 5781 expressions = self._parse_star() 5782 if expressions: 5783 then = self.expression(exp.Update, expressions=expressions) 5784 else: 5785 then = self.expression( 5786 exp.Update, 5787 expressions=self._match(TokenType.SET) 5788 and self._parse_csv(self._parse_equality), 5789 ) 5790 elif self._match(TokenType.DELETE): 5791 then = self.expression(exp.Var, this=self._prev.text) 5792 else: 5793 then = None 5794 5795 whens.append( 5796 self.expression( 5797 exp.When, 5798 matched=matched, 5799 source=source, 5800 condition=condition, 5801 then=then, 5802 ) 5803 ) 5804 return whens 5805 5806 def _parse_show(self) -> t.Optional[exp.Expression]: 5807 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5808 if parser: 5809 return parser(self) 5810 return self._parse_as_command(self._prev) 5811 5812 def _parse_set_item_assignment( 5813 self, kind: t.Optional[str] = None 5814 ) -> t.Optional[exp.Expression]: 5815 index = self._index 5816 5817 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5818 return self._parse_set_transaction(global_=kind == "GLOBAL") 5819 5820 left = self._parse_primary() or self._parse_id_var() 5821 assignment_delimiter = self._match_texts(("=", "TO")) 5822 5823 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5824 self._retreat(index) 5825 return None 5826 5827 right = self._parse_statement() or self._parse_id_var() 5828 this = self.expression(exp.EQ, this=left, expression=right) 5829 5830 return self.expression(exp.SetItem, this=this, kind=kind) 5831 5832 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5833 self._match_text_seq("TRANSACTION") 5834 characteristics = self._parse_csv( 5835 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5836 ) 5837 return self.expression( 5838 exp.SetItem, 5839 expressions=characteristics, 5840 kind="TRANSACTION", 5841 **{"global": global_}, # type: ignore 5842 ) 5843 5844 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5845 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5846 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5847 5848 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5849 index = self._index 5850 set_ = self.expression( 5851 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5852 ) 5853 5854 if self._curr: 5855 self._retreat(index) 5856 return self._parse_as_command(self._prev) 5857 5858 return set_ 5859 5860 def _parse_var_from_options( 5861 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5862 ) -> t.Optional[exp.Var]: 5863 start = self._curr 5864 if not start: 5865 return None 5866 5867 option = start.text.upper() 5868 continuations = options.get(option) 5869 5870 index = self._index 5871 self._advance() 5872 for keywords in continuations or []: 5873 if isinstance(keywords, str): 5874 keywords = (keywords,) 5875 5876 if self._match_text_seq(*keywords): 5877 option = f"{option} {' '.join(keywords)}" 5878 break 5879 else: 5880 if continuations or continuations is None: 5881 if raise_unmatched: 5882 self.raise_error(f"Unknown option {option}") 5883 5884 self._retreat(index) 5885 return None 5886 5887 return exp.var(option) 5888 5889 def _parse_as_command(self, start: Token) -> exp.Command: 5890 while self._curr: 5891 self._advance() 5892 text = self._find_sql(start, self._prev) 5893 size = len(start.text) 5894 self._warn_unsupported() 5895 return exp.Command(this=text[:size], expression=text[size:]) 5896 5897 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5898 settings = [] 5899 5900 self._match_l_paren() 5901 kind = self._parse_id_var() 5902 5903 if self._match(TokenType.L_PAREN): 5904 while True: 5905 key = self._parse_id_var() 5906 value = self._parse_primary() 5907 5908 if not key and value is None: 5909 break 5910 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5911 self._match(TokenType.R_PAREN) 5912 5913 self._match_r_paren() 5914 5915 return self.expression( 5916 exp.DictProperty, 5917 this=this, 5918 kind=kind.this if kind else None, 5919 settings=settings, 5920 ) 5921 5922 def _parse_dict_range(self, this: str) -> exp.DictRange: 5923 self._match_l_paren() 5924 has_min = self._match_text_seq("MIN") 5925 if has_min: 5926 min = self._parse_var() or self._parse_primary() 5927 self._match_text_seq("MAX") 5928 max = self._parse_var() or self._parse_primary() 5929 else: 5930 max = self._parse_var() or self._parse_primary() 5931 min = exp.Literal.number(0) 5932 self._match_r_paren() 5933 return self.expression(exp.DictRange, this=this, min=min, max=max) 5934 5935 def _parse_comprehension( 5936 self, this: t.Optional[exp.Expression] 5937 ) -> t.Optional[exp.Comprehension]: 5938 index = self._index 5939 expression = self._parse_column() 5940 if not self._match(TokenType.IN): 5941 self._retreat(index - 1) 5942 return None 5943 iterator = self._parse_column() 5944 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5945 return self.expression( 5946 exp.Comprehension, 5947 this=this, 5948 expression=expression, 5949 iterator=iterator, 5950 condition=condition, 5951 ) 5952 5953 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5954 if self._match(TokenType.HEREDOC_STRING): 5955 return self.expression(exp.Heredoc, this=self._prev.text) 5956 5957 if not self._match_text_seq("$"): 5958 return None 5959 5960 tags = ["$"] 5961 tag_text = None 5962 5963 if self._is_connected(): 5964 self._advance() 5965 tags.append(self._prev.text.upper()) 5966 else: 5967 self.raise_error("No closing $ found") 5968 5969 if tags[-1] != "$": 5970 if self._is_connected() and self._match_text_seq("$"): 5971 tag_text = tags[-1] 5972 tags.append("$") 5973 else: 5974 self.raise_error("No closing $ found") 5975 5976 heredoc_start = self._curr 5977 5978 while self._curr: 5979 if self._match_text_seq(*tags, advance=False): 5980 this = self._find_sql(heredoc_start, self._prev) 5981 self._advance(len(tags)) 5982 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5983 5984 self._advance() 5985 5986 self.raise_error(f"No closing {''.join(tags)} found") 5987 return None 5988 5989 def _find_parser( 5990 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5991 ) -> t.Optional[t.Callable]: 5992 if not self._curr: 5993 return None 5994 5995 index = self._index 5996 this = [] 5997 while True: 5998 # The current token might be multiple words 5999 curr = self._curr.text.upper() 6000 key = curr.split(" ") 6001 this.append(curr) 6002 6003 self._advance() 6004 result, trie = in_trie(trie, key) 6005 if result == TrieResult.FAILED: 6006 break 6007 6008 if result == TrieResult.EXISTS: 6009 subparser = parsers[" ".join(this)] 6010 return subparser 6011 6012 self._retreat(index) 6013 return None 6014 6015 def _match(self, token_type, advance=True, expression=None): 6016 if not self._curr: 6017 return None 6018 6019 if self._curr.token_type == token_type: 6020 if advance: 6021 self._advance() 6022 self._add_comments(expression) 6023 return True 6024 6025 return None 6026 6027 def _match_set(self, types, advance=True): 6028 if not self._curr: 6029 return None 6030 6031 if self._curr.token_type in types: 6032 if advance: 6033 self._advance() 6034 return True 6035 6036 return None 6037 6038 def _match_pair(self, token_type_a, token_type_b, advance=True): 6039 if not self._curr or not self._next: 6040 return None 6041 6042 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6043 if advance: 6044 self._advance(2) 6045 return True 6046 6047 return None 6048 6049 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6050 if not self._match(TokenType.L_PAREN, expression=expression): 6051 self.raise_error("Expecting (") 6052 6053 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6054 if not self._match(TokenType.R_PAREN, expression=expression): 6055 self.raise_error("Expecting )") 6056 6057 def _match_texts(self, texts, advance=True): 6058 if self._curr and self._curr.text.upper() in texts: 6059 if advance: 6060 self._advance() 6061 return True 6062 return None 6063 6064 def _match_text_seq(self, *texts, advance=True): 6065 index = self._index 6066 for text in texts: 6067 if self._curr and self._curr.text.upper() == text: 6068 self._advance() 6069 else: 6070 self._retreat(index) 6071 return None 6072 6073 if not advance: 6074 self._retreat(index) 6075 6076 return True 6077 6078 def _replace_lambda( 6079 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6080 ) -> t.Optional[exp.Expression]: 6081 if not node: 6082 return node 6083 6084 for column in node.find_all(exp.Column): 6085 if column.parts[0].name in lambda_variables: 6086 dot_or_id = column.to_dot() if column.table else column.this 6087 parent = column.parent 6088 6089 while isinstance(parent, exp.Dot): 6090 if not isinstance(parent.parent, exp.Dot): 6091 parent.replace(dot_or_id) 6092 break 6093 parent = parent.parent 6094 else: 6095 if column is node: 6096 node = dot_or_id 6097 else: 6098 column.replace(dot_or_id) 6099 return node 6100 6101 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6102 start = self._prev 6103 6104 # Not to be confused with TRUNCATE(number, decimals) function call 6105 if self._match(TokenType.L_PAREN): 6106 self._retreat(self._index - 2) 6107 return self._parse_function() 6108 6109 # Clickhouse supports TRUNCATE DATABASE as well 6110 is_database = self._match(TokenType.DATABASE) 6111 6112 self._match(TokenType.TABLE) 6113 6114 exists = self._parse_exists(not_=False) 6115 6116 expressions = self._parse_csv( 6117 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6118 ) 6119 6120 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6121 6122 if self._match_text_seq("RESTART", "IDENTITY"): 6123 identity = "RESTART" 6124 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6125 identity = "CONTINUE" 6126 else: 6127 identity = None 6128 6129 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6130 option = self._prev.text 6131 else: 6132 option = None 6133 6134 partition = self._parse_partition() 6135 6136 # Fallback case 6137 if self._curr: 6138 return self._parse_as_command(start) 6139 6140 return self.expression( 6141 exp.TruncateTable, 6142 expressions=expressions, 6143 is_database=is_database, 6144 exists=exists, 6145 cluster=cluster, 6146 identity=identity, 6147 option=option, 6148 partition=partition, 6149 ) 6150 6151 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6152 this = self._parse_ordered(self._parse_opclass) 6153 6154 if not self._match(TokenType.WITH): 6155 return this 6156 6157 op = self._parse_var(any_token=True) 6158 6159 return self.expression(exp.WithOperator, this=this, op=op)
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 124 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 125 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 126 "TIME_TO_TIME_STR": lambda args: exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 131 this=exp.Cast( 132 this=seq_get(args, 0), 133 to=exp.DataType(this=exp.DataType.Type.TEXT), 134 ), 135 start=exp.Literal.number(1), 136 length=exp.Literal.number(10), 137 ), 138 "VAR_MAP": build_var_map, 139 } 140 141 NO_PAREN_FUNCTIONS = { 142 TokenType.CURRENT_DATE: exp.CurrentDate, 143 TokenType.CURRENT_DATETIME: exp.CurrentDate, 144 TokenType.CURRENT_TIME: exp.CurrentTime, 145 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 146 TokenType.CURRENT_USER: exp.CurrentUser, 147 } 148 149 STRUCT_TYPE_TOKENS = { 150 TokenType.NESTED, 151 TokenType.OBJECT, 152 TokenType.STRUCT, 153 } 154 155 NESTED_TYPE_TOKENS = { 156 TokenType.ARRAY, 157 TokenType.LOWCARDINALITY, 158 TokenType.MAP, 159 TokenType.NULLABLE, 160 *STRUCT_TYPE_TOKENS, 161 } 162 163 ENUM_TYPE_TOKENS = { 164 TokenType.ENUM, 165 TokenType.ENUM8, 166 TokenType.ENUM16, 167 } 168 169 AGGREGATE_TYPE_TOKENS = { 170 TokenType.AGGREGATEFUNCTION, 171 TokenType.SIMPLEAGGREGATEFUNCTION, 172 } 173 174 TYPE_TOKENS = { 175 TokenType.BIT, 176 TokenType.BOOLEAN, 177 TokenType.TINYINT, 178 TokenType.UTINYINT, 179 TokenType.SMALLINT, 180 TokenType.USMALLINT, 181 TokenType.INT, 182 TokenType.UINT, 183 TokenType.BIGINT, 184 TokenType.UBIGINT, 185 TokenType.INT128, 186 TokenType.UINT128, 187 TokenType.INT256, 188 TokenType.UINT256, 189 TokenType.MEDIUMINT, 190 TokenType.UMEDIUMINT, 191 TokenType.FIXEDSTRING, 192 TokenType.FLOAT, 193 TokenType.DOUBLE, 194 TokenType.CHAR, 195 TokenType.NCHAR, 196 TokenType.VARCHAR, 197 TokenType.NVARCHAR, 198 TokenType.BPCHAR, 199 TokenType.TEXT, 200 TokenType.MEDIUMTEXT, 201 TokenType.LONGTEXT, 202 TokenType.MEDIUMBLOB, 203 TokenType.LONGBLOB, 204 TokenType.BINARY, 205 TokenType.VARBINARY, 206 TokenType.JSON, 207 TokenType.JSONB, 208 TokenType.INTERVAL, 209 TokenType.TINYBLOB, 210 TokenType.TINYTEXT, 211 TokenType.TIME, 212 TokenType.TIMETZ, 213 TokenType.TIMESTAMP, 214 TokenType.TIMESTAMP_S, 215 TokenType.TIMESTAMP_MS, 216 TokenType.TIMESTAMP_NS, 217 TokenType.TIMESTAMPTZ, 218 TokenType.TIMESTAMPLTZ, 219 TokenType.DATETIME, 220 TokenType.DATETIME64, 221 TokenType.DATE, 222 TokenType.DATE32, 223 TokenType.INT4RANGE, 224 TokenType.INT4MULTIRANGE, 225 TokenType.INT8RANGE, 226 TokenType.INT8MULTIRANGE, 227 TokenType.NUMRANGE, 228 TokenType.NUMMULTIRANGE, 229 TokenType.TSRANGE, 230 TokenType.TSMULTIRANGE, 231 TokenType.TSTZRANGE, 232 TokenType.TSTZMULTIRANGE, 233 TokenType.DATERANGE, 234 TokenType.DATEMULTIRANGE, 235 TokenType.DECIMAL, 236 TokenType.UDECIMAL, 237 TokenType.BIGDECIMAL, 238 TokenType.UUID, 239 TokenType.GEOGRAPHY, 240 TokenType.GEOMETRY, 241 TokenType.HLLSKETCH, 242 TokenType.HSTORE, 243 TokenType.PSEUDO_TYPE, 244 TokenType.SUPER, 245 TokenType.SERIAL, 246 TokenType.SMALLSERIAL, 247 TokenType.BIGSERIAL, 248 TokenType.XML, 249 TokenType.YEAR, 250 TokenType.UNIQUEIDENTIFIER, 251 TokenType.USERDEFINED, 252 TokenType.MONEY, 253 TokenType.SMALLMONEY, 254 TokenType.ROWVERSION, 255 TokenType.IMAGE, 256 TokenType.VARIANT, 257 TokenType.OBJECT, 258 TokenType.OBJECT_IDENTIFIER, 259 TokenType.INET, 260 TokenType.IPADDRESS, 261 TokenType.IPPREFIX, 262 TokenType.IPV4, 263 TokenType.IPV6, 264 TokenType.UNKNOWN, 265 TokenType.NULL, 266 TokenType.NAME, 267 *ENUM_TYPE_TOKENS, 268 *NESTED_TYPE_TOKENS, 269 *AGGREGATE_TYPE_TOKENS, 270 } 271 272 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 273 TokenType.BIGINT: TokenType.UBIGINT, 274 TokenType.INT: TokenType.UINT, 275 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 276 TokenType.SMALLINT: TokenType.USMALLINT, 277 TokenType.TINYINT: TokenType.UTINYINT, 278 TokenType.DECIMAL: TokenType.UDECIMAL, 279 } 280 281 SUBQUERY_PREDICATES = { 282 TokenType.ANY: exp.Any, 283 TokenType.ALL: exp.All, 284 TokenType.EXISTS: exp.Exists, 285 TokenType.SOME: exp.Any, 286 } 287 288 RESERVED_TOKENS = { 289 *Tokenizer.SINGLE_TOKENS.values(), 290 TokenType.SELECT, 291 } 292 293 DB_CREATABLES = { 294 TokenType.DATABASE, 295 TokenType.SCHEMA, 296 TokenType.TABLE, 297 TokenType.VIEW, 298 TokenType.MODEL, 299 TokenType.DICTIONARY, 300 TokenType.SEQUENCE, 301 TokenType.STORAGE_INTEGRATION, 302 } 303 304 CREATABLES = { 305 TokenType.COLUMN, 306 TokenType.CONSTRAINT, 307 TokenType.FUNCTION, 308 TokenType.INDEX, 309 TokenType.PROCEDURE, 310 TokenType.FOREIGN_KEY, 311 *DB_CREATABLES, 312 } 313 314 # Tokens that can represent identifiers 315 ID_VAR_TOKENS = { 316 TokenType.VAR, 317 TokenType.ANTI, 318 TokenType.APPLY, 319 TokenType.ASC, 320 TokenType.ASOF, 321 TokenType.AUTO_INCREMENT, 322 TokenType.BEGIN, 323 TokenType.BPCHAR, 324 TokenType.CACHE, 325 TokenType.CASE, 326 TokenType.COLLATE, 327 TokenType.COMMAND, 328 TokenType.COMMENT, 329 TokenType.COMMIT, 330 TokenType.CONSTRAINT, 331 TokenType.DEFAULT, 332 TokenType.DELETE, 333 TokenType.DESC, 334 TokenType.DESCRIBE, 335 TokenType.DICTIONARY, 336 TokenType.DIV, 337 TokenType.END, 338 TokenType.EXECUTE, 339 TokenType.ESCAPE, 340 TokenType.FALSE, 341 TokenType.FIRST, 342 TokenType.FILTER, 343 TokenType.FINAL, 344 TokenType.FORMAT, 345 TokenType.FULL, 346 TokenType.IS, 347 TokenType.ISNULL, 348 TokenType.INTERVAL, 349 TokenType.KEEP, 350 TokenType.KILL, 351 TokenType.LEFT, 352 TokenType.LOAD, 353 TokenType.MERGE, 354 TokenType.NATURAL, 355 TokenType.NEXT, 356 TokenType.OFFSET, 357 TokenType.OPERATOR, 358 TokenType.ORDINALITY, 359 TokenType.OVERLAPS, 360 TokenType.OVERWRITE, 361 TokenType.PARTITION, 362 TokenType.PERCENT, 363 TokenType.PIVOT, 364 TokenType.PRAGMA, 365 TokenType.RANGE, 366 TokenType.RECURSIVE, 367 TokenType.REFERENCES, 368 TokenType.REFRESH, 369 TokenType.REPLACE, 370 TokenType.RIGHT, 371 TokenType.ROW, 372 TokenType.ROWS, 373 TokenType.SEMI, 374 TokenType.SET, 375 TokenType.SETTINGS, 376 TokenType.SHOW, 377 TokenType.TEMPORARY, 378 TokenType.TOP, 379 TokenType.TRUE, 380 TokenType.TRUNCATE, 381 TokenType.UNIQUE, 382 TokenType.UNPIVOT, 383 TokenType.UPDATE, 384 TokenType.USE, 385 TokenType.VOLATILE, 386 TokenType.WINDOW, 387 *CREATABLES, 388 *SUBQUERY_PREDICATES, 389 *TYPE_TOKENS, 390 *NO_PAREN_FUNCTIONS, 391 } 392 393 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 394 395 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 396 TokenType.ANTI, 397 TokenType.APPLY, 398 TokenType.ASOF, 399 TokenType.FULL, 400 TokenType.LEFT, 401 TokenType.LOCK, 402 TokenType.NATURAL, 403 TokenType.OFFSET, 404 TokenType.RIGHT, 405 TokenType.SEMI, 406 TokenType.WINDOW, 407 } 408 409 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 410 411 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 412 413 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 414 415 FUNC_TOKENS = { 416 TokenType.COLLATE, 417 TokenType.COMMAND, 418 TokenType.CURRENT_DATE, 419 TokenType.CURRENT_DATETIME, 420 TokenType.CURRENT_TIMESTAMP, 421 TokenType.CURRENT_TIME, 422 TokenType.CURRENT_USER, 423 TokenType.FILTER, 424 TokenType.FIRST, 425 TokenType.FORMAT, 426 TokenType.GLOB, 427 TokenType.IDENTIFIER, 428 TokenType.INDEX, 429 TokenType.ISNULL, 430 TokenType.ILIKE, 431 TokenType.INSERT, 432 TokenType.LIKE, 433 TokenType.MERGE, 434 TokenType.OFFSET, 435 TokenType.PRIMARY_KEY, 436 TokenType.RANGE, 437 TokenType.REPLACE, 438 TokenType.RLIKE, 439 TokenType.ROW, 440 TokenType.UNNEST, 441 TokenType.VAR, 442 TokenType.LEFT, 443 TokenType.RIGHT, 444 TokenType.SEQUENCE, 445 TokenType.DATE, 446 TokenType.DATETIME, 447 TokenType.TABLE, 448 TokenType.TIMESTAMP, 449 TokenType.TIMESTAMPTZ, 450 TokenType.TRUNCATE, 451 TokenType.WINDOW, 452 TokenType.XOR, 453 *TYPE_TOKENS, 454 *SUBQUERY_PREDICATES, 455 } 456 457 CONJUNCTION = { 458 TokenType.AND: exp.And, 459 TokenType.OR: exp.Or, 460 } 461 462 EQUALITY = { 463 TokenType.COLON_EQ: exp.PropertyEQ, 464 TokenType.EQ: exp.EQ, 465 TokenType.NEQ: exp.NEQ, 466 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 467 } 468 469 COMPARISON = { 470 TokenType.GT: exp.GT, 471 TokenType.GTE: exp.GTE, 472 TokenType.LT: exp.LT, 473 TokenType.LTE: exp.LTE, 474 } 475 476 BITWISE = { 477 TokenType.AMP: exp.BitwiseAnd, 478 TokenType.CARET: exp.BitwiseXor, 479 TokenType.PIPE: exp.BitwiseOr, 480 } 481 482 TERM = { 483 TokenType.DASH: exp.Sub, 484 TokenType.PLUS: exp.Add, 485 TokenType.MOD: exp.Mod, 486 TokenType.COLLATE: exp.Collate, 487 } 488 489 FACTOR = { 490 TokenType.DIV: exp.IntDiv, 491 TokenType.LR_ARROW: exp.Distance, 492 TokenType.SLASH: exp.Div, 493 TokenType.STAR: exp.Mul, 494 } 495 496 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 497 498 TIMES = { 499 TokenType.TIME, 500 TokenType.TIMETZ, 501 } 502 503 TIMESTAMPS = { 504 TokenType.TIMESTAMP, 505 TokenType.TIMESTAMPTZ, 506 TokenType.TIMESTAMPLTZ, 507 *TIMES, 508 } 509 510 SET_OPERATIONS = { 511 TokenType.UNION, 512 TokenType.INTERSECT, 513 TokenType.EXCEPT, 514 } 515 516 JOIN_METHODS = { 517 TokenType.ASOF, 518 TokenType.NATURAL, 519 TokenType.POSITIONAL, 520 } 521 522 JOIN_SIDES = { 523 TokenType.LEFT, 524 TokenType.RIGHT, 525 TokenType.FULL, 526 } 527 528 JOIN_KINDS = { 529 TokenType.INNER, 530 TokenType.OUTER, 531 TokenType.CROSS, 532 TokenType.SEMI, 533 TokenType.ANTI, 534 } 535 536 JOIN_HINTS: t.Set[str] = set() 537 538 LAMBDAS = { 539 TokenType.ARROW: lambda self, expressions: self.expression( 540 exp.Lambda, 541 this=self._replace_lambda( 542 self._parse_conjunction(), 543 {node.name for node in expressions}, 544 ), 545 expressions=expressions, 546 ), 547 TokenType.FARROW: lambda self, expressions: self.expression( 548 exp.Kwarg, 549 this=exp.var(expressions[0].name), 550 expression=self._parse_conjunction(), 551 ), 552 } 553 554 COLUMN_OPERATORS = { 555 TokenType.DOT: None, 556 TokenType.DCOLON: lambda self, this, to: self.expression( 557 exp.Cast if self.STRICT_CAST else exp.TryCast, 558 this=this, 559 to=to, 560 ), 561 TokenType.ARROW: lambda self, this, path: self.expression( 562 exp.JSONExtract, 563 this=this, 564 expression=self.dialect.to_json_path(path), 565 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 566 ), 567 TokenType.DARROW: lambda self, this, path: self.expression( 568 exp.JSONExtractScalar, 569 this=this, 570 expression=self.dialect.to_json_path(path), 571 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 572 ), 573 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 574 exp.JSONBExtract, 575 this=this, 576 expression=path, 577 ), 578 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtractScalar, 580 this=this, 581 expression=path, 582 ), 583 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 584 exp.JSONBContains, 585 this=this, 586 expression=key, 587 ), 588 } 589 590 EXPRESSION_PARSERS = { 591 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 592 exp.Column: lambda self: self._parse_column(), 593 exp.Condition: lambda self: self._parse_conjunction(), 594 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 595 exp.Expression: lambda self: self._parse_expression(), 596 exp.From: lambda self: self._parse_from(), 597 exp.Group: lambda self: self._parse_group(), 598 exp.Having: lambda self: self._parse_having(), 599 exp.Identifier: lambda self: self._parse_id_var(), 600 exp.Join: lambda self: self._parse_join(), 601 exp.Lambda: lambda self: self._parse_lambda(), 602 exp.Lateral: lambda self: self._parse_lateral(), 603 exp.Limit: lambda self: self._parse_limit(), 604 exp.Offset: lambda self: self._parse_offset(), 605 exp.Order: lambda self: self._parse_order(), 606 exp.Ordered: lambda self: self._parse_ordered(), 607 exp.Properties: lambda self: self._parse_properties(), 608 exp.Qualify: lambda self: self._parse_qualify(), 609 exp.Returning: lambda self: self._parse_returning(), 610 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 611 exp.Table: lambda self: self._parse_table_parts(), 612 exp.TableAlias: lambda self: self._parse_table_alias(), 613 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 614 exp.Where: lambda self: self._parse_where(), 615 exp.Window: lambda self: self._parse_named_window(), 616 exp.With: lambda self: self._parse_with(), 617 "JOIN_TYPE": lambda self: self._parse_join_parts(), 618 } 619 620 STATEMENT_PARSERS = { 621 TokenType.ALTER: lambda self: self._parse_alter(), 622 TokenType.BEGIN: lambda self: self._parse_transaction(), 623 TokenType.CACHE: lambda self: self._parse_cache(), 624 TokenType.COMMENT: lambda self: self._parse_comment(), 625 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 626 TokenType.CREATE: lambda self: self._parse_create(), 627 TokenType.DELETE: lambda self: self._parse_delete(), 628 TokenType.DESC: lambda self: self._parse_describe(), 629 TokenType.DESCRIBE: lambda self: self._parse_describe(), 630 TokenType.DROP: lambda self: self._parse_drop(), 631 TokenType.INSERT: lambda self: self._parse_insert(), 632 TokenType.KILL: lambda self: self._parse_kill(), 633 TokenType.LOAD: lambda self: self._parse_load(), 634 TokenType.MERGE: lambda self: self._parse_merge(), 635 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 636 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 637 TokenType.REFRESH: lambda self: self._parse_refresh(), 638 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 639 TokenType.SET: lambda self: self._parse_set(), 640 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 641 TokenType.UNCACHE: lambda self: self._parse_uncache(), 642 TokenType.UPDATE: lambda self: self._parse_update(), 643 TokenType.USE: lambda self: self.expression( 644 exp.Use, 645 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 646 this=self._parse_table(schema=False), 647 ), 648 } 649 650 UNARY_PARSERS = { 651 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 652 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 653 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 654 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 655 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 656 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 657 } 658 659 STRING_PARSERS = { 660 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 661 exp.RawString, this=token.text 662 ), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.STRING: lambda self, token: self.expression( 668 exp.Literal, this=token.text, is_string=True 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 } 676 677 NUMERIC_PARSERS = { 678 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 679 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 680 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 681 TokenType.NUMBER: lambda self, token: self.expression( 682 exp.Literal, this=token.text, is_string=False 683 ), 684 } 685 686 PRIMARY_PARSERS = { 687 **STRING_PARSERS, 688 **NUMERIC_PARSERS, 689 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 690 TokenType.NULL: lambda self, _: self.expression(exp.Null), 691 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 692 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 693 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 694 TokenType.STAR: lambda self, _: self.expression( 695 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 696 ), 697 } 698 699 PLACEHOLDER_PARSERS = { 700 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 701 TokenType.PARAMETER: lambda self: self._parse_parameter(), 702 TokenType.COLON: lambda self: ( 703 self.expression(exp.Placeholder, this=self._prev.text) 704 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 705 else None 706 ), 707 } 708 709 RANGE_PARSERS = { 710 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 711 TokenType.GLOB: binary_range_parser(exp.Glob), 712 TokenType.ILIKE: binary_range_parser(exp.ILike), 713 TokenType.IN: lambda self, this: self._parse_in(this), 714 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 715 TokenType.IS: lambda self, this: self._parse_is(this), 716 TokenType.LIKE: binary_range_parser(exp.Like), 717 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 718 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 719 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 720 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 721 } 722 723 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 724 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 725 "AUTO": lambda self: self._parse_auto_property(), 726 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 727 "BACKUP": lambda self: self.expression( 728 exp.BackupProperty, this=self._parse_var(any_token=True) 729 ), 730 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 731 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 732 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 733 "CHECKSUM": lambda self: self._parse_checksum(), 734 "CLUSTER BY": lambda self: self._parse_cluster(), 735 "CLUSTERED": lambda self: self._parse_clustered_by(), 736 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 737 exp.CollateProperty, **kwargs 738 ), 739 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 740 "CONTAINS": lambda self: self._parse_contains_property(), 741 "COPY": lambda self: self._parse_copy_property(), 742 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 743 "DEFINER": lambda self: self._parse_definer(), 744 "DETERMINISTIC": lambda self: self.expression( 745 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 746 ), 747 "DISTKEY": lambda self: self._parse_distkey(), 748 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 749 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 750 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 751 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 752 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 753 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 754 "FREESPACE": lambda self: self._parse_freespace(), 755 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 756 "HEAP": lambda self: self.expression(exp.HeapProperty), 757 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 758 "IMMUTABLE": lambda self: self.expression( 759 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 760 ), 761 "INHERITS": lambda self: self.expression( 762 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 763 ), 764 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 765 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 766 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 767 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 768 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 769 "LIKE": lambda self: self._parse_create_like(), 770 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 771 "LOCK": lambda self: self._parse_locking(), 772 "LOCKING": lambda self: self._parse_locking(), 773 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 774 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 775 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 776 "MODIFIES": lambda self: self._parse_modifies_property(), 777 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 778 "NO": lambda self: self._parse_no_property(), 779 "ON": lambda self: self._parse_on_property(), 780 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 781 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 782 "PARTITION": lambda self: self._parse_partitioned_of(), 783 "PARTITION BY": lambda self: self._parse_partitioned_by(), 784 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 785 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 786 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 787 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 788 "READS": lambda self: self._parse_reads_property(), 789 "REMOTE": lambda self: self._parse_remote_with_connection(), 790 "RETURNS": lambda self: self._parse_returns(), 791 "ROW": lambda self: self._parse_row(), 792 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 793 "SAMPLE": lambda self: self.expression( 794 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 795 ), 796 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 797 "SETTINGS": lambda self: self.expression( 798 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 799 ), 800 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 801 "SORTKEY": lambda self: self._parse_sortkey(), 802 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 803 "STABLE": lambda self: self.expression( 804 exp.StabilityProperty, this=exp.Literal.string("STABLE") 805 ), 806 "STORED": lambda self: self._parse_stored(), 807 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 808 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 809 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 810 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 811 "TO": lambda self: self._parse_to_table(), 812 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 813 "TRANSFORM": lambda self: self.expression( 814 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 815 ), 816 "TTL": lambda self: self._parse_ttl(), 817 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 818 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 819 "VOLATILE": lambda self: self._parse_volatile_property(), 820 "WITH": lambda self: self._parse_with_property(), 821 } 822 823 CONSTRAINT_PARSERS = { 824 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 825 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 826 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 827 "CHARACTER SET": lambda self: self.expression( 828 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 829 ), 830 "CHECK": lambda self: self.expression( 831 exp.CheckColumnConstraint, 832 this=self._parse_wrapped(self._parse_conjunction), 833 enforced=self._match_text_seq("ENFORCED"), 834 ), 835 "COLLATE": lambda self: self.expression( 836 exp.CollateColumnConstraint, this=self._parse_var() 837 ), 838 "COMMENT": lambda self: self.expression( 839 exp.CommentColumnConstraint, this=self._parse_string() 840 ), 841 "COMPRESS": lambda self: self._parse_compress(), 842 "CLUSTERED": lambda self: self.expression( 843 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 844 ), 845 "NONCLUSTERED": lambda self: self.expression( 846 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "DEFAULT": lambda self: self.expression( 849 exp.DefaultColumnConstraint, this=self._parse_bitwise() 850 ), 851 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 852 "EXCLUDE": lambda self: self.expression( 853 exp.ExcludeColumnConstraint, this=self._parse_index_params() 854 ), 855 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 856 "FORMAT": lambda self: self.expression( 857 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 858 ), 859 "GENERATED": lambda self: self._parse_generated_as_identity(), 860 "IDENTITY": lambda self: self._parse_auto_increment(), 861 "INLINE": lambda self: self._parse_inline(), 862 "LIKE": lambda self: self._parse_create_like(), 863 "NOT": lambda self: self._parse_not_constraint(), 864 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 865 "ON": lambda self: ( 866 self._match(TokenType.UPDATE) 867 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 868 ) 869 or self.expression(exp.OnProperty, this=self._parse_id_var()), 870 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 871 "PERIOD": lambda self: self._parse_period_for_system_time(), 872 "PRIMARY KEY": lambda self: self._parse_primary_key(), 873 "REFERENCES": lambda self: self._parse_references(match=False), 874 "TITLE": lambda self: self.expression( 875 exp.TitleColumnConstraint, this=self._parse_var_or_string() 876 ), 877 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 878 "UNIQUE": lambda self: self._parse_unique(), 879 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 880 "WITH": lambda self: self.expression( 881 exp.Properties, expressions=self._parse_wrapped_properties() 882 ), 883 } 884 885 ALTER_PARSERS = { 886 "ADD": lambda self: self._parse_alter_table_add(), 887 "ALTER": lambda self: self._parse_alter_table_alter(), 888 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 889 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 890 "DROP": lambda self: self._parse_alter_table_drop(), 891 "RENAME": lambda self: self._parse_alter_table_rename(), 892 } 893 894 SCHEMA_UNNAMED_CONSTRAINTS = { 895 "CHECK", 896 "EXCLUDE", 897 "FOREIGN KEY", 898 "LIKE", 899 "PERIOD", 900 "PRIMARY KEY", 901 "UNIQUE", 902 } 903 904 NO_PAREN_FUNCTION_PARSERS = { 905 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 906 "CASE": lambda self: self._parse_case(), 907 "IF": lambda self: self._parse_if(), 908 "NEXT": lambda self: self._parse_next_value_for(), 909 } 910 911 INVALID_FUNC_NAME_TOKENS = { 912 TokenType.IDENTIFIER, 913 TokenType.STRING, 914 } 915 916 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 917 918 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 919 920 FUNCTION_PARSERS = { 921 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 922 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 923 "DECODE": lambda self: self._parse_decode(), 924 "EXTRACT": lambda self: self._parse_extract(), 925 "JSON_OBJECT": lambda self: self._parse_json_object(), 926 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 927 "JSON_TABLE": lambda self: self._parse_json_table(), 928 "MATCH": lambda self: self._parse_match_against(), 929 "OPENJSON": lambda self: self._parse_open_json(), 930 "POSITION": lambda self: self._parse_position(), 931 "PREDICT": lambda self: self._parse_predict(), 932 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 933 "STRING_AGG": lambda self: self._parse_string_agg(), 934 "SUBSTRING": lambda self: self._parse_substring(), 935 "TRIM": lambda self: self._parse_trim(), 936 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 937 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 938 } 939 940 QUERY_MODIFIER_PARSERS = { 941 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 942 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 943 TokenType.WHERE: lambda self: ("where", self._parse_where()), 944 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 945 TokenType.HAVING: lambda self: ("having", self._parse_having()), 946 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 947 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 948 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 949 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 950 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 951 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 952 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 953 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 954 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 955 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 956 TokenType.CLUSTER_BY: lambda self: ( 957 "cluster", 958 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 959 ), 960 TokenType.DISTRIBUTE_BY: lambda self: ( 961 "distribute", 962 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 963 ), 964 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 965 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 966 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 967 } 968 969 SET_PARSERS = { 970 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 971 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 972 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 973 "TRANSACTION": lambda self: self._parse_set_transaction(), 974 } 975 976 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 977 978 TYPE_LITERAL_PARSERS = { 979 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 980 } 981 982 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 983 984 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 985 986 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 987 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 988 "ISOLATION": ( 989 ("LEVEL", "REPEATABLE", "READ"), 990 ("LEVEL", "READ", "COMMITTED"), 991 ("LEVEL", "READ", "UNCOMITTED"), 992 ("LEVEL", "SERIALIZABLE"), 993 ), 994 "READ": ("WRITE", "ONLY"), 995 } 996 997 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 998 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 999 ) 1000 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1001 1002 CREATE_SEQUENCE: OPTIONS_TYPE = { 1003 "SCALE": ("EXTEND", "NOEXTEND"), 1004 "SHARD": ("EXTEND", "NOEXTEND"), 1005 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1006 **dict.fromkeys( 1007 ( 1008 "SESSION", 1009 "GLOBAL", 1010 "KEEP", 1011 "NOKEEP", 1012 "ORDER", 1013 "NOORDER", 1014 "NOCACHE", 1015 "CYCLE", 1016 "NOCYCLE", 1017 "NOMINVALUE", 1018 "NOMAXVALUE", 1019 "NOSCALE", 1020 "NOSHARD", 1021 ), 1022 tuple(), 1023 ), 1024 } 1025 1026 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1027 1028 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1029 1030 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1031 1032 CLONE_KEYWORDS = {"CLONE", "COPY"} 1033 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1034 1035 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1036 1037 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1038 1039 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1040 1041 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1042 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1043 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1044 1045 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1046 1047 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1048 1049 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1050 1051 DISTINCT_TOKENS = {TokenType.DISTINCT} 1052 1053 NULL_TOKENS = {TokenType.NULL} 1054 1055 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1056 1057 STRICT_CAST = True 1058 1059 PREFIXED_PIVOT_COLUMNS = False 1060 IDENTIFY_PIVOT_STRINGS = False 1061 1062 LOG_DEFAULTS_TO_LN = False 1063 1064 # Whether ADD is present for each column added by ALTER TABLE 1065 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1066 1067 # Whether the table sample clause expects CSV syntax 1068 TABLESAMPLE_CSV = False 1069 1070 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1071 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1072 1073 # Whether the TRIM function expects the characters to trim as its first argument 1074 TRIM_PATTERN_FIRST = False 1075 1076 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1077 STRING_ALIASES = False 1078 1079 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1080 MODIFIERS_ATTACHED_TO_UNION = True 1081 UNION_MODIFIERS = {"order", "limit", "offset"} 1082 1083 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1084 NO_PAREN_IF_COMMANDS = True 1085 1086 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1087 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1088 1089 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1090 # If this is True and '(' is not found, the keyword will be treated as an identifier 1091 VALUES_FOLLOWED_BY_PAREN = True 1092 1093 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1094 SUPPORTS_IMPLICIT_UNNEST = False 1095 1096 __slots__ = ( 1097 "error_level", 1098 "error_message_context", 1099 "max_errors", 1100 "dialect", 1101 "sql", 1102 "errors", 1103 "_tokens", 1104 "_index", 1105 "_curr", 1106 "_next", 1107 "_prev", 1108 "_prev_comments", 1109 ) 1110 1111 # Autofilled 1112 SHOW_TRIE: t.Dict = {} 1113 SET_TRIE: t.Dict = {} 1114 1115 def __init__( 1116 self, 1117 error_level: t.Optional[ErrorLevel] = None, 1118 error_message_context: int = 100, 1119 max_errors: int = 3, 1120 dialect: DialectType = None, 1121 ): 1122 from sqlglot.dialects import Dialect 1123 1124 self.error_level = error_level or ErrorLevel.IMMEDIATE 1125 self.error_message_context = error_message_context 1126 self.max_errors = max_errors 1127 self.dialect = Dialect.get_or_raise(dialect) 1128 self.reset() 1129 1130 def reset(self): 1131 self.sql = "" 1132 self.errors = [] 1133 self._tokens = [] 1134 self._index = 0 1135 self._curr = None 1136 self._next = None 1137 self._prev = None 1138 self._prev_comments = None 1139 1140 def parse( 1141 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1142 ) -> t.List[t.Optional[exp.Expression]]: 1143 """ 1144 Parses a list of tokens and returns a list of syntax trees, one tree 1145 per parsed SQL statement. 1146 1147 Args: 1148 raw_tokens: The list of tokens. 1149 sql: The original SQL string, used to produce helpful debug messages. 1150 1151 Returns: 1152 The list of the produced syntax trees. 1153 """ 1154 return self._parse( 1155 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1156 ) 1157 1158 def parse_into( 1159 self, 1160 expression_types: exp.IntoType, 1161 raw_tokens: t.List[Token], 1162 sql: t.Optional[str] = None, 1163 ) -> t.List[t.Optional[exp.Expression]]: 1164 """ 1165 Parses a list of tokens into a given Expression type. If a collection of Expression 1166 types is given instead, this method will try to parse the token list into each one 1167 of them, stopping at the first for which the parsing succeeds. 1168 1169 Args: 1170 expression_types: The expression type(s) to try and parse the token list into. 1171 raw_tokens: The list of tokens. 1172 sql: The original SQL string, used to produce helpful debug messages. 1173 1174 Returns: 1175 The target Expression. 1176 """ 1177 errors = [] 1178 for expression_type in ensure_list(expression_types): 1179 parser = self.EXPRESSION_PARSERS.get(expression_type) 1180 if not parser: 1181 raise TypeError(f"No parser registered for {expression_type}") 1182 1183 try: 1184 return self._parse(parser, raw_tokens, sql) 1185 except ParseError as e: 1186 e.errors[0]["into_expression"] = expression_type 1187 errors.append(e) 1188 1189 raise ParseError( 1190 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1191 errors=merge_errors(errors), 1192 ) from errors[-1] 1193 1194 def _parse( 1195 self, 1196 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1197 raw_tokens: t.List[Token], 1198 sql: t.Optional[str] = None, 1199 ) -> t.List[t.Optional[exp.Expression]]: 1200 self.reset() 1201 self.sql = sql or "" 1202 1203 total = len(raw_tokens) 1204 chunks: t.List[t.List[Token]] = [[]] 1205 1206 for i, token in enumerate(raw_tokens): 1207 if token.token_type == TokenType.SEMICOLON: 1208 if i < total - 1: 1209 chunks.append([]) 1210 else: 1211 chunks[-1].append(token) 1212 1213 expressions = [] 1214 1215 for tokens in chunks: 1216 self._index = -1 1217 self._tokens = tokens 1218 self._advance() 1219 1220 expressions.append(parse_method(self)) 1221 1222 if self._index < len(self._tokens): 1223 self.raise_error("Invalid expression / Unexpected token") 1224 1225 self.check_errors() 1226 1227 return expressions 1228 1229 def check_errors(self) -> None: 1230 """Logs or raises any found errors, depending on the chosen error level setting.""" 1231 if self.error_level == ErrorLevel.WARN: 1232 for error in self.errors: 1233 logger.error(str(error)) 1234 elif self.error_level == ErrorLevel.RAISE and self.errors: 1235 raise ParseError( 1236 concat_messages(self.errors, self.max_errors), 1237 errors=merge_errors(self.errors), 1238 ) 1239 1240 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1241 """ 1242 Appends an error in the list of recorded errors or raises it, depending on the chosen 1243 error level setting. 1244 """ 1245 token = token or self._curr or self._prev or Token.string("") 1246 start = token.start 1247 end = token.end + 1 1248 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1249 highlight = self.sql[start:end] 1250 end_context = self.sql[end : end + self.error_message_context] 1251 1252 error = ParseError.new( 1253 f"{message}. Line {token.line}, Col: {token.col}.\n" 1254 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1255 description=message, 1256 line=token.line, 1257 col=token.col, 1258 start_context=start_context, 1259 highlight=highlight, 1260 end_context=end_context, 1261 ) 1262 1263 if self.error_level == ErrorLevel.IMMEDIATE: 1264 raise error 1265 1266 self.errors.append(error) 1267 1268 def expression( 1269 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1270 ) -> E: 1271 """ 1272 Creates a new, validated Expression. 1273 1274 Args: 1275 exp_class: The expression class to instantiate. 1276 comments: An optional list of comments to attach to the expression. 1277 kwargs: The arguments to set for the expression along with their respective values. 1278 1279 Returns: 1280 The target expression. 1281 """ 1282 instance = exp_class(**kwargs) 1283 instance.add_comments(comments) if comments else self._add_comments(instance) 1284 return self.validate_expression(instance) 1285 1286 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1287 if expression and self._prev_comments: 1288 expression.add_comments(self._prev_comments) 1289 self._prev_comments = None 1290 1291 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1292 """ 1293 Validates an Expression, making sure that all its mandatory arguments are set. 1294 1295 Args: 1296 expression: The expression to validate. 1297 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1298 1299 Returns: 1300 The validated expression. 1301 """ 1302 if self.error_level != ErrorLevel.IGNORE: 1303 for error_message in expression.error_messages(args): 1304 self.raise_error(error_message) 1305 1306 return expression 1307 1308 def _find_sql(self, start: Token, end: Token) -> str: 1309 return self.sql[start.start : end.end + 1] 1310 1311 def _is_connected(self) -> bool: 1312 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1313 1314 def _advance(self, times: int = 1) -> None: 1315 self._index += times 1316 self._curr = seq_get(self._tokens, self._index) 1317 self._next = seq_get(self._tokens, self._index + 1) 1318 1319 if self._index > 0: 1320 self._prev = self._tokens[self._index - 1] 1321 self._prev_comments = self._prev.comments 1322 else: 1323 self._prev = None 1324 self._prev_comments = None 1325 1326 def _retreat(self, index: int) -> None: 1327 if index != self._index: 1328 self._advance(index - self._index) 1329 1330 def _warn_unsupported(self) -> None: 1331 if len(self._tokens) <= 1: 1332 return 1333 1334 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1335 # interested in emitting a warning for the one being currently processed. 1336 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1337 1338 logger.warning( 1339 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1340 ) 1341 1342 def _parse_command(self) -> exp.Command: 1343 self._warn_unsupported() 1344 return self.expression( 1345 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1346 ) 1347 1348 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1349 start = self._prev 1350 exists = self._parse_exists() if allow_exists else None 1351 1352 self._match(TokenType.ON) 1353 1354 kind = self._match_set(self.CREATABLES) and self._prev 1355 if not kind: 1356 return self._parse_as_command(start) 1357 1358 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1359 this = self._parse_user_defined_function(kind=kind.token_type) 1360 elif kind.token_type == TokenType.TABLE: 1361 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1362 elif kind.token_type == TokenType.COLUMN: 1363 this = self._parse_column() 1364 else: 1365 this = self._parse_id_var() 1366 1367 self._match(TokenType.IS) 1368 1369 return self.expression( 1370 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1371 ) 1372 1373 def _parse_to_table( 1374 self, 1375 ) -> exp.ToTableProperty: 1376 table = self._parse_table_parts(schema=True) 1377 return self.expression(exp.ToTableProperty, this=table) 1378 1379 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1380 def _parse_ttl(self) -> exp.Expression: 1381 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1382 this = self._parse_bitwise() 1383 1384 if self._match_text_seq("DELETE"): 1385 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1386 if self._match_text_seq("RECOMPRESS"): 1387 return self.expression( 1388 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1389 ) 1390 if self._match_text_seq("TO", "DISK"): 1391 return self.expression( 1392 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1393 ) 1394 if self._match_text_seq("TO", "VOLUME"): 1395 return self.expression( 1396 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1397 ) 1398 1399 return this 1400 1401 expressions = self._parse_csv(_parse_ttl_action) 1402 where = self._parse_where() 1403 group = self._parse_group() 1404 1405 aggregates = None 1406 if group and self._match(TokenType.SET): 1407 aggregates = self._parse_csv(self._parse_set_item) 1408 1409 return self.expression( 1410 exp.MergeTreeTTL, 1411 expressions=expressions, 1412 where=where, 1413 group=group, 1414 aggregates=aggregates, 1415 ) 1416 1417 def _parse_statement(self) -> t.Optional[exp.Expression]: 1418 if self._curr is None: 1419 return None 1420 1421 if self._match_set(self.STATEMENT_PARSERS): 1422 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1423 1424 if self._match_set(Tokenizer.COMMANDS): 1425 return self._parse_command() 1426 1427 expression = self._parse_expression() 1428 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1429 return self._parse_query_modifiers(expression) 1430 1431 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1432 start = self._prev 1433 temporary = self._match(TokenType.TEMPORARY) 1434 materialized = self._match_text_seq("MATERIALIZED") 1435 1436 kind = self._match_set(self.CREATABLES) and self._prev.text 1437 if not kind: 1438 return self._parse_as_command(start) 1439 1440 if_exists = exists or self._parse_exists() 1441 table = self._parse_table_parts( 1442 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1443 ) 1444 1445 if self._match(TokenType.L_PAREN, advance=False): 1446 expressions = self._parse_wrapped_csv(self._parse_types) 1447 else: 1448 expressions = None 1449 1450 return self.expression( 1451 exp.Drop, 1452 comments=start.comments, 1453 exists=if_exists, 1454 this=table, 1455 expressions=expressions, 1456 kind=kind, 1457 temporary=temporary, 1458 materialized=materialized, 1459 cascade=self._match_text_seq("CASCADE"), 1460 constraints=self._match_text_seq("CONSTRAINTS"), 1461 purge=self._match_text_seq("PURGE"), 1462 ) 1463 1464 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1465 return ( 1466 self._match_text_seq("IF") 1467 and (not not_ or self._match(TokenType.NOT)) 1468 and self._match(TokenType.EXISTS) 1469 ) 1470 1471 def _parse_create(self) -> exp.Create | exp.Command: 1472 # Note: this can't be None because we've matched a statement parser 1473 start = self._prev 1474 comments = self._prev_comments 1475 1476 replace = ( 1477 start.token_type == TokenType.REPLACE 1478 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1479 or self._match_pair(TokenType.OR, TokenType.ALTER) 1480 ) 1481 1482 unique = self._match(TokenType.UNIQUE) 1483 1484 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1485 self._advance() 1486 1487 properties = None 1488 create_token = self._match_set(self.CREATABLES) and self._prev 1489 1490 if not create_token: 1491 # exp.Properties.Location.POST_CREATE 1492 properties = self._parse_properties() 1493 create_token = self._match_set(self.CREATABLES) and self._prev 1494 1495 if not properties or not create_token: 1496 return self._parse_as_command(start) 1497 1498 exists = self._parse_exists(not_=True) 1499 this = None 1500 expression: t.Optional[exp.Expression] = None 1501 indexes = None 1502 no_schema_binding = None 1503 begin = None 1504 end = None 1505 clone = None 1506 1507 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1508 nonlocal properties 1509 if properties and temp_props: 1510 properties.expressions.extend(temp_props.expressions) 1511 elif temp_props: 1512 properties = temp_props 1513 1514 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1515 this = self._parse_user_defined_function(kind=create_token.token_type) 1516 1517 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1518 extend_props(self._parse_properties()) 1519 1520 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1521 1522 if not expression: 1523 if self._match(TokenType.COMMAND): 1524 expression = self._parse_as_command(self._prev) 1525 else: 1526 begin = self._match(TokenType.BEGIN) 1527 return_ = self._match_text_seq("RETURN") 1528 1529 if self._match(TokenType.STRING, advance=False): 1530 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1531 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1532 expression = self._parse_string() 1533 extend_props(self._parse_properties()) 1534 else: 1535 expression = self._parse_statement() 1536 1537 end = self._match_text_seq("END") 1538 1539 if return_: 1540 expression = self.expression(exp.Return, this=expression) 1541 elif create_token.token_type == TokenType.INDEX: 1542 this = self._parse_index(index=self._parse_id_var()) 1543 elif create_token.token_type in self.DB_CREATABLES: 1544 table_parts = self._parse_table_parts( 1545 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1546 ) 1547 1548 # exp.Properties.Location.POST_NAME 1549 self._match(TokenType.COMMA) 1550 extend_props(self._parse_properties(before=True)) 1551 1552 this = self._parse_schema(this=table_parts) 1553 1554 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1555 extend_props(self._parse_properties()) 1556 1557 self._match(TokenType.ALIAS) 1558 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1559 # exp.Properties.Location.POST_ALIAS 1560 extend_props(self._parse_properties()) 1561 1562 if create_token.token_type == TokenType.SEQUENCE: 1563 expression = self._parse_types() 1564 extend_props(self._parse_properties()) 1565 else: 1566 expression = self._parse_ddl_select() 1567 1568 if create_token.token_type == TokenType.TABLE: 1569 # exp.Properties.Location.POST_EXPRESSION 1570 extend_props(self._parse_properties()) 1571 1572 indexes = [] 1573 while True: 1574 index = self._parse_index() 1575 1576 # exp.Properties.Location.POST_INDEX 1577 extend_props(self._parse_properties()) 1578 1579 if not index: 1580 break 1581 else: 1582 self._match(TokenType.COMMA) 1583 indexes.append(index) 1584 elif create_token.token_type == TokenType.VIEW: 1585 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1586 no_schema_binding = True 1587 1588 shallow = self._match_text_seq("SHALLOW") 1589 1590 if self._match_texts(self.CLONE_KEYWORDS): 1591 copy = self._prev.text.lower() == "copy" 1592 clone = self.expression( 1593 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1594 ) 1595 1596 if self._curr: 1597 return self._parse_as_command(start) 1598 1599 return self.expression( 1600 exp.Create, 1601 comments=comments, 1602 this=this, 1603 kind=create_token.text.upper(), 1604 replace=replace, 1605 unique=unique, 1606 expression=expression, 1607 exists=exists, 1608 properties=properties, 1609 indexes=indexes, 1610 no_schema_binding=no_schema_binding, 1611 begin=begin, 1612 end=end, 1613 clone=clone, 1614 ) 1615 1616 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1617 seq = exp.SequenceProperties() 1618 1619 options = [] 1620 index = self._index 1621 1622 while self._curr: 1623 if self._match_text_seq("INCREMENT"): 1624 self._match_text_seq("BY") 1625 self._match_text_seq("=") 1626 seq.set("increment", self._parse_term()) 1627 elif self._match_text_seq("MINVALUE"): 1628 seq.set("minvalue", self._parse_term()) 1629 elif self._match_text_seq("MAXVALUE"): 1630 seq.set("maxvalue", self._parse_term()) 1631 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1632 self._match_text_seq("=") 1633 seq.set("start", self._parse_term()) 1634 elif self._match_text_seq("CACHE"): 1635 # T-SQL allows empty CACHE which is initialized dynamically 1636 seq.set("cache", self._parse_number() or True) 1637 elif self._match_text_seq("OWNED", "BY"): 1638 # "OWNED BY NONE" is the default 1639 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1640 else: 1641 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1642 if opt: 1643 options.append(opt) 1644 else: 1645 break 1646 1647 seq.set("options", options if options else None) 1648 return None if self._index == index else seq 1649 1650 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1651 # only used for teradata currently 1652 self._match(TokenType.COMMA) 1653 1654 kwargs = { 1655 "no": self._match_text_seq("NO"), 1656 "dual": self._match_text_seq("DUAL"), 1657 "before": self._match_text_seq("BEFORE"), 1658 "default": self._match_text_seq("DEFAULT"), 1659 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1660 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1661 "after": self._match_text_seq("AFTER"), 1662 "minimum": self._match_texts(("MIN", "MINIMUM")), 1663 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1664 } 1665 1666 if self._match_texts(self.PROPERTY_PARSERS): 1667 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1668 try: 1669 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1670 except TypeError: 1671 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1672 1673 return None 1674 1675 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1676 return self._parse_wrapped_csv(self._parse_property) 1677 1678 def _parse_property(self) -> t.Optional[exp.Expression]: 1679 if self._match_texts(self.PROPERTY_PARSERS): 1680 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1681 1682 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1683 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1684 1685 if self._match_text_seq("COMPOUND", "SORTKEY"): 1686 return self._parse_sortkey(compound=True) 1687 1688 if self._match_text_seq("SQL", "SECURITY"): 1689 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1690 1691 index = self._index 1692 key = self._parse_column() 1693 1694 if not self._match(TokenType.EQ): 1695 self._retreat(index) 1696 return self._parse_sequence_properties() 1697 1698 return self.expression( 1699 exp.Property, 1700 this=key.to_dot() if isinstance(key, exp.Column) else key, 1701 value=self._parse_column() or self._parse_var(any_token=True), 1702 ) 1703 1704 def _parse_stored(self) -> exp.FileFormatProperty: 1705 self._match(TokenType.ALIAS) 1706 1707 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1708 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1709 1710 return self.expression( 1711 exp.FileFormatProperty, 1712 this=( 1713 self.expression( 1714 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1715 ) 1716 if input_format or output_format 1717 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1718 ), 1719 ) 1720 1721 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1722 self._match(TokenType.EQ) 1723 self._match(TokenType.ALIAS) 1724 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1725 1726 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1727 properties = [] 1728 while True: 1729 if before: 1730 prop = self._parse_property_before() 1731 else: 1732 prop = self._parse_property() 1733 if not prop: 1734 break 1735 for p in ensure_list(prop): 1736 properties.append(p) 1737 1738 if properties: 1739 return self.expression(exp.Properties, expressions=properties) 1740 1741 return None 1742 1743 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1744 return self.expression( 1745 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1746 ) 1747 1748 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1749 if self._index >= 2: 1750 pre_volatile_token = self._tokens[self._index - 2] 1751 else: 1752 pre_volatile_token = None 1753 1754 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1755 return exp.VolatileProperty() 1756 1757 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1758 1759 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1760 self._match_pair(TokenType.EQ, TokenType.ON) 1761 1762 prop = self.expression(exp.WithSystemVersioningProperty) 1763 if self._match(TokenType.L_PAREN): 1764 self._match_text_seq("HISTORY_TABLE", "=") 1765 prop.set("this", self._parse_table_parts()) 1766 1767 if self._match(TokenType.COMMA): 1768 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1769 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1770 1771 self._match_r_paren() 1772 1773 return prop 1774 1775 def _parse_with_property( 1776 self, 1777 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1778 if self._match(TokenType.L_PAREN, advance=False): 1779 return self._parse_wrapped_properties() 1780 1781 if self._match_text_seq("JOURNAL"): 1782 return self._parse_withjournaltable() 1783 1784 if self._match_text_seq("DATA"): 1785 return self._parse_withdata(no=False) 1786 elif self._match_text_seq("NO", "DATA"): 1787 return self._parse_withdata(no=True) 1788 1789 if not self._next: 1790 return None 1791 1792 return self._parse_withisolatedloading() 1793 1794 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1795 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1796 self._match(TokenType.EQ) 1797 1798 user = self._parse_id_var() 1799 self._match(TokenType.PARAMETER) 1800 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1801 1802 if not user or not host: 1803 return None 1804 1805 return exp.DefinerProperty(this=f"{user}@{host}") 1806 1807 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1808 self._match(TokenType.TABLE) 1809 self._match(TokenType.EQ) 1810 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1811 1812 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1813 return self.expression(exp.LogProperty, no=no) 1814 1815 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1816 return self.expression(exp.JournalProperty, **kwargs) 1817 1818 def _parse_checksum(self) -> exp.ChecksumProperty: 1819 self._match(TokenType.EQ) 1820 1821 on = None 1822 if self._match(TokenType.ON): 1823 on = True 1824 elif self._match_text_seq("OFF"): 1825 on = False 1826 1827 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1828 1829 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1830 return self.expression( 1831 exp.Cluster, 1832 expressions=( 1833 self._parse_wrapped_csv(self._parse_ordered) 1834 if wrapped 1835 else self._parse_csv(self._parse_ordered) 1836 ), 1837 ) 1838 1839 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1840 self._match_text_seq("BY") 1841 1842 self._match_l_paren() 1843 expressions = self._parse_csv(self._parse_column) 1844 self._match_r_paren() 1845 1846 if self._match_text_seq("SORTED", "BY"): 1847 self._match_l_paren() 1848 sorted_by = self._parse_csv(self._parse_ordered) 1849 self._match_r_paren() 1850 else: 1851 sorted_by = None 1852 1853 self._match(TokenType.INTO) 1854 buckets = self._parse_number() 1855 self._match_text_seq("BUCKETS") 1856 1857 return self.expression( 1858 exp.ClusteredByProperty, 1859 expressions=expressions, 1860 sorted_by=sorted_by, 1861 buckets=buckets, 1862 ) 1863 1864 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1865 if not self._match_text_seq("GRANTS"): 1866 self._retreat(self._index - 1) 1867 return None 1868 1869 return self.expression(exp.CopyGrantsProperty) 1870 1871 def _parse_freespace(self) -> exp.FreespaceProperty: 1872 self._match(TokenType.EQ) 1873 return self.expression( 1874 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1875 ) 1876 1877 def _parse_mergeblockratio( 1878 self, no: bool = False, default: bool = False 1879 ) -> exp.MergeBlockRatioProperty: 1880 if self._match(TokenType.EQ): 1881 return self.expression( 1882 exp.MergeBlockRatioProperty, 1883 this=self._parse_number(), 1884 percent=self._match(TokenType.PERCENT), 1885 ) 1886 1887 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1888 1889 def _parse_datablocksize( 1890 self, 1891 default: t.Optional[bool] = None, 1892 minimum: t.Optional[bool] = None, 1893 maximum: t.Optional[bool] = None, 1894 ) -> exp.DataBlocksizeProperty: 1895 self._match(TokenType.EQ) 1896 size = self._parse_number() 1897 1898 units = None 1899 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1900 units = self._prev.text 1901 1902 return self.expression( 1903 exp.DataBlocksizeProperty, 1904 size=size, 1905 units=units, 1906 default=default, 1907 minimum=minimum, 1908 maximum=maximum, 1909 ) 1910 1911 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1912 self._match(TokenType.EQ) 1913 always = self._match_text_seq("ALWAYS") 1914 manual = self._match_text_seq("MANUAL") 1915 never = self._match_text_seq("NEVER") 1916 default = self._match_text_seq("DEFAULT") 1917 1918 autotemp = None 1919 if self._match_text_seq("AUTOTEMP"): 1920 autotemp = self._parse_schema() 1921 1922 return self.expression( 1923 exp.BlockCompressionProperty, 1924 always=always, 1925 manual=manual, 1926 never=never, 1927 default=default, 1928 autotemp=autotemp, 1929 ) 1930 1931 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1932 no = self._match_text_seq("NO") 1933 concurrent = self._match_text_seq("CONCURRENT") 1934 self._match_text_seq("ISOLATED", "LOADING") 1935 for_all = self._match_text_seq("FOR", "ALL") 1936 for_insert = self._match_text_seq("FOR", "INSERT") 1937 for_none = self._match_text_seq("FOR", "NONE") 1938 return self.expression( 1939 exp.IsolatedLoadingProperty, 1940 no=no, 1941 concurrent=concurrent, 1942 for_all=for_all, 1943 for_insert=for_insert, 1944 for_none=for_none, 1945 ) 1946 1947 def _parse_locking(self) -> exp.LockingProperty: 1948 if self._match(TokenType.TABLE): 1949 kind = "TABLE" 1950 elif self._match(TokenType.VIEW): 1951 kind = "VIEW" 1952 elif self._match(TokenType.ROW): 1953 kind = "ROW" 1954 elif self._match_text_seq("DATABASE"): 1955 kind = "DATABASE" 1956 else: 1957 kind = None 1958 1959 if kind in ("DATABASE", "TABLE", "VIEW"): 1960 this = self._parse_table_parts() 1961 else: 1962 this = None 1963 1964 if self._match(TokenType.FOR): 1965 for_or_in = "FOR" 1966 elif self._match(TokenType.IN): 1967 for_or_in = "IN" 1968 else: 1969 for_or_in = None 1970 1971 if self._match_text_seq("ACCESS"): 1972 lock_type = "ACCESS" 1973 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1974 lock_type = "EXCLUSIVE" 1975 elif self._match_text_seq("SHARE"): 1976 lock_type = "SHARE" 1977 elif self._match_text_seq("READ"): 1978 lock_type = "READ" 1979 elif self._match_text_seq("WRITE"): 1980 lock_type = "WRITE" 1981 elif self._match_text_seq("CHECKSUM"): 1982 lock_type = "CHECKSUM" 1983 else: 1984 lock_type = None 1985 1986 override = self._match_text_seq("OVERRIDE") 1987 1988 return self.expression( 1989 exp.LockingProperty, 1990 this=this, 1991 kind=kind, 1992 for_or_in=for_or_in, 1993 lock_type=lock_type, 1994 override=override, 1995 ) 1996 1997 def _parse_partition_by(self) -> t.List[exp.Expression]: 1998 if self._match(TokenType.PARTITION_BY): 1999 return self._parse_csv(self._parse_conjunction) 2000 return [] 2001 2002 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2003 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2004 if self._match_text_seq("MINVALUE"): 2005 return exp.var("MINVALUE") 2006 if self._match_text_seq("MAXVALUE"): 2007 return exp.var("MAXVALUE") 2008 return self._parse_bitwise() 2009 2010 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2011 expression = None 2012 from_expressions = None 2013 to_expressions = None 2014 2015 if self._match(TokenType.IN): 2016 this = self._parse_wrapped_csv(self._parse_bitwise) 2017 elif self._match(TokenType.FROM): 2018 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2019 self._match_text_seq("TO") 2020 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2021 elif self._match_text_seq("WITH", "(", "MODULUS"): 2022 this = self._parse_number() 2023 self._match_text_seq(",", "REMAINDER") 2024 expression = self._parse_number() 2025 self._match_r_paren() 2026 else: 2027 self.raise_error("Failed to parse partition bound spec.") 2028 2029 return self.expression( 2030 exp.PartitionBoundSpec, 2031 this=this, 2032 expression=expression, 2033 from_expressions=from_expressions, 2034 to_expressions=to_expressions, 2035 ) 2036 2037 # https://www.postgresql.org/docs/current/sql-createtable.html 2038 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2039 if not self._match_text_seq("OF"): 2040 self._retreat(self._index - 1) 2041 return None 2042 2043 this = self._parse_table(schema=True) 2044 2045 if self._match(TokenType.DEFAULT): 2046 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2047 elif self._match_text_seq("FOR", "VALUES"): 2048 expression = self._parse_partition_bound_spec() 2049 else: 2050 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2051 2052 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2053 2054 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2055 self._match(TokenType.EQ) 2056 return self.expression( 2057 exp.PartitionedByProperty, 2058 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2059 ) 2060 2061 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2062 if self._match_text_seq("AND", "STATISTICS"): 2063 statistics = True 2064 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2065 statistics = False 2066 else: 2067 statistics = None 2068 2069 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2070 2071 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2072 if self._match_text_seq("SQL"): 2073 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2074 return None 2075 2076 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2077 if self._match_text_seq("SQL", "DATA"): 2078 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2079 return None 2080 2081 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2082 if self._match_text_seq("PRIMARY", "INDEX"): 2083 return exp.NoPrimaryIndexProperty() 2084 if self._match_text_seq("SQL"): 2085 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2086 return None 2087 2088 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2089 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2090 return exp.OnCommitProperty() 2091 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2092 return exp.OnCommitProperty(delete=True) 2093 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2094 2095 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2096 if self._match_text_seq("SQL", "DATA"): 2097 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2098 return None 2099 2100 def _parse_distkey(self) -> exp.DistKeyProperty: 2101 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2102 2103 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2104 table = self._parse_table(schema=True) 2105 2106 options = [] 2107 while self._match_texts(("INCLUDING", "EXCLUDING")): 2108 this = self._prev.text.upper() 2109 2110 id_var = self._parse_id_var() 2111 if not id_var: 2112 return None 2113 2114 options.append( 2115 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2116 ) 2117 2118 return self.expression(exp.LikeProperty, this=table, expressions=options) 2119 2120 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2121 return self.expression( 2122 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2123 ) 2124 2125 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2126 self._match(TokenType.EQ) 2127 return self.expression( 2128 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2129 ) 2130 2131 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2132 self._match_text_seq("WITH", "CONNECTION") 2133 return self.expression( 2134 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2135 ) 2136 2137 def _parse_returns(self) -> exp.ReturnsProperty: 2138 value: t.Optional[exp.Expression] 2139 is_table = self._match(TokenType.TABLE) 2140 2141 if is_table: 2142 if self._match(TokenType.LT): 2143 value = self.expression( 2144 exp.Schema, 2145 this="TABLE", 2146 expressions=self._parse_csv(self._parse_struct_types), 2147 ) 2148 if not self._match(TokenType.GT): 2149 self.raise_error("Expecting >") 2150 else: 2151 value = self._parse_schema(exp.var("TABLE")) 2152 else: 2153 value = self._parse_types() 2154 2155 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2156 2157 def _parse_describe(self) -> exp.Describe: 2158 kind = self._match_set(self.CREATABLES) and self._prev.text 2159 extended = self._match_text_seq("EXTENDED") 2160 this = self._parse_table(schema=True) 2161 properties = self._parse_properties() 2162 expressions = properties.expressions if properties else None 2163 return self.expression( 2164 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2165 ) 2166 2167 def _parse_insert(self) -> exp.Insert: 2168 comments = ensure_list(self._prev_comments) 2169 hint = self._parse_hint() 2170 overwrite = self._match(TokenType.OVERWRITE) 2171 ignore = self._match(TokenType.IGNORE) 2172 local = self._match_text_seq("LOCAL") 2173 alternative = None 2174 is_function = None 2175 2176 if self._match_text_seq("DIRECTORY"): 2177 this: t.Optional[exp.Expression] = self.expression( 2178 exp.Directory, 2179 this=self._parse_var_or_string(), 2180 local=local, 2181 row_format=self._parse_row_format(match_row=True), 2182 ) 2183 else: 2184 if self._match(TokenType.OR): 2185 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2186 2187 self._match(TokenType.INTO) 2188 comments += ensure_list(self._prev_comments) 2189 self._match(TokenType.TABLE) 2190 is_function = self._match(TokenType.FUNCTION) 2191 2192 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2193 2194 returning = self._parse_returning() 2195 2196 return self.expression( 2197 exp.Insert, 2198 comments=comments, 2199 hint=hint, 2200 is_function=is_function, 2201 this=this, 2202 by_name=self._match_text_seq("BY", "NAME"), 2203 exists=self._parse_exists(), 2204 partition=self._parse_partition(), 2205 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2206 and self._parse_conjunction(), 2207 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2208 conflict=self._parse_on_conflict(), 2209 returning=returning or self._parse_returning(), 2210 overwrite=overwrite, 2211 alternative=alternative, 2212 ignore=ignore, 2213 ) 2214 2215 def _parse_kill(self) -> exp.Kill: 2216 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2217 2218 return self.expression( 2219 exp.Kill, 2220 this=self._parse_primary(), 2221 kind=kind, 2222 ) 2223 2224 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2225 conflict = self._match_text_seq("ON", "CONFLICT") 2226 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2227 2228 if not conflict and not duplicate: 2229 return None 2230 2231 conflict_keys = None 2232 constraint = None 2233 2234 if conflict: 2235 if self._match_text_seq("ON", "CONSTRAINT"): 2236 constraint = self._parse_id_var() 2237 elif self._match(TokenType.L_PAREN): 2238 conflict_keys = self._parse_csv(self._parse_id_var) 2239 self._match_r_paren() 2240 2241 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2242 if self._prev.token_type == TokenType.UPDATE: 2243 self._match(TokenType.SET) 2244 expressions = self._parse_csv(self._parse_equality) 2245 else: 2246 expressions = None 2247 2248 return self.expression( 2249 exp.OnConflict, 2250 duplicate=duplicate, 2251 expressions=expressions, 2252 action=action, 2253 conflict_keys=conflict_keys, 2254 constraint=constraint, 2255 ) 2256 2257 def _parse_returning(self) -> t.Optional[exp.Returning]: 2258 if not self._match(TokenType.RETURNING): 2259 return None 2260 return self.expression( 2261 exp.Returning, 2262 expressions=self._parse_csv(self._parse_expression), 2263 into=self._match(TokenType.INTO) and self._parse_table_part(), 2264 ) 2265 2266 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2267 if not self._match(TokenType.FORMAT): 2268 return None 2269 return self._parse_row_format() 2270 2271 def _parse_row_format( 2272 self, match_row: bool = False 2273 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2274 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2275 return None 2276 2277 if self._match_text_seq("SERDE"): 2278 this = self._parse_string() 2279 2280 serde_properties = None 2281 if self._match(TokenType.SERDE_PROPERTIES): 2282 serde_properties = self.expression( 2283 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2284 ) 2285 2286 return self.expression( 2287 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2288 ) 2289 2290 self._match_text_seq("DELIMITED") 2291 2292 kwargs = {} 2293 2294 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2295 kwargs["fields"] = self._parse_string() 2296 if self._match_text_seq("ESCAPED", "BY"): 2297 kwargs["escaped"] = self._parse_string() 2298 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2299 kwargs["collection_items"] = self._parse_string() 2300 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2301 kwargs["map_keys"] = self._parse_string() 2302 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2303 kwargs["lines"] = self._parse_string() 2304 if self._match_text_seq("NULL", "DEFINED", "AS"): 2305 kwargs["null"] = self._parse_string() 2306 2307 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2308 2309 def _parse_load(self) -> exp.LoadData | exp.Command: 2310 if self._match_text_seq("DATA"): 2311 local = self._match_text_seq("LOCAL") 2312 self._match_text_seq("INPATH") 2313 inpath = self._parse_string() 2314 overwrite = self._match(TokenType.OVERWRITE) 2315 self._match_pair(TokenType.INTO, TokenType.TABLE) 2316 2317 return self.expression( 2318 exp.LoadData, 2319 this=self._parse_table(schema=True), 2320 local=local, 2321 overwrite=overwrite, 2322 inpath=inpath, 2323 partition=self._parse_partition(), 2324 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2325 serde=self._match_text_seq("SERDE") and self._parse_string(), 2326 ) 2327 return self._parse_as_command(self._prev) 2328 2329 def _parse_delete(self) -> exp.Delete: 2330 # This handles MySQL's "Multiple-Table Syntax" 2331 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2332 tables = None 2333 comments = self._prev_comments 2334 if not self._match(TokenType.FROM, advance=False): 2335 tables = self._parse_csv(self._parse_table) or None 2336 2337 returning = self._parse_returning() 2338 2339 return self.expression( 2340 exp.Delete, 2341 comments=comments, 2342 tables=tables, 2343 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2344 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2345 where=self._parse_where(), 2346 returning=returning or self._parse_returning(), 2347 limit=self._parse_limit(), 2348 ) 2349 2350 def _parse_update(self) -> exp.Update: 2351 comments = self._prev_comments 2352 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2353 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2354 returning = self._parse_returning() 2355 return self.expression( 2356 exp.Update, 2357 comments=comments, 2358 **{ # type: ignore 2359 "this": this, 2360 "expressions": expressions, 2361 "from": self._parse_from(joins=True), 2362 "where": self._parse_where(), 2363 "returning": returning or self._parse_returning(), 2364 "order": self._parse_order(), 2365 "limit": self._parse_limit(), 2366 }, 2367 ) 2368 2369 def _parse_uncache(self) -> exp.Uncache: 2370 if not self._match(TokenType.TABLE): 2371 self.raise_error("Expecting TABLE after UNCACHE") 2372 2373 return self.expression( 2374 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2375 ) 2376 2377 def _parse_cache(self) -> exp.Cache: 2378 lazy = self._match_text_seq("LAZY") 2379 self._match(TokenType.TABLE) 2380 table = self._parse_table(schema=True) 2381 2382 options = [] 2383 if self._match_text_seq("OPTIONS"): 2384 self._match_l_paren() 2385 k = self._parse_string() 2386 self._match(TokenType.EQ) 2387 v = self._parse_string() 2388 options = [k, v] 2389 self._match_r_paren() 2390 2391 self._match(TokenType.ALIAS) 2392 return self.expression( 2393 exp.Cache, 2394 this=table, 2395 lazy=lazy, 2396 options=options, 2397 expression=self._parse_select(nested=True), 2398 ) 2399 2400 def _parse_partition(self) -> t.Optional[exp.Partition]: 2401 if not self._match(TokenType.PARTITION): 2402 return None 2403 2404 return self.expression( 2405 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2406 ) 2407 2408 def _parse_value(self) -> exp.Tuple: 2409 if self._match(TokenType.L_PAREN): 2410 expressions = self._parse_csv(self._parse_expression) 2411 self._match_r_paren() 2412 return self.expression(exp.Tuple, expressions=expressions) 2413 2414 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2415 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2416 2417 def _parse_projections(self) -> t.List[exp.Expression]: 2418 return self._parse_expressions() 2419 2420 def _parse_select( 2421 self, 2422 nested: bool = False, 2423 table: bool = False, 2424 parse_subquery_alias: bool = True, 2425 parse_set_operation: bool = True, 2426 ) -> t.Optional[exp.Expression]: 2427 cte = self._parse_with() 2428 2429 if cte: 2430 this = self._parse_statement() 2431 2432 if not this: 2433 self.raise_error("Failed to parse any statement following CTE") 2434 return cte 2435 2436 if "with" in this.arg_types: 2437 this.set("with", cte) 2438 else: 2439 self.raise_error(f"{this.key} does not support CTE") 2440 this = cte 2441 2442 return this 2443 2444 # duckdb supports leading with FROM x 2445 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2446 2447 if self._match(TokenType.SELECT): 2448 comments = self._prev_comments 2449 2450 hint = self._parse_hint() 2451 all_ = self._match(TokenType.ALL) 2452 distinct = self._match_set(self.DISTINCT_TOKENS) 2453 2454 kind = ( 2455 self._match(TokenType.ALIAS) 2456 and self._match_texts(("STRUCT", "VALUE")) 2457 and self._prev.text.upper() 2458 ) 2459 2460 if distinct: 2461 distinct = self.expression( 2462 exp.Distinct, 2463 on=self._parse_value() if self._match(TokenType.ON) else None, 2464 ) 2465 2466 if all_ and distinct: 2467 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2468 2469 limit = self._parse_limit(top=True) 2470 projections = self._parse_projections() 2471 2472 this = self.expression( 2473 exp.Select, 2474 kind=kind, 2475 hint=hint, 2476 distinct=distinct, 2477 expressions=projections, 2478 limit=limit, 2479 ) 2480 this.comments = comments 2481 2482 into = self._parse_into() 2483 if into: 2484 this.set("into", into) 2485 2486 if not from_: 2487 from_ = self._parse_from() 2488 2489 if from_: 2490 this.set("from", from_) 2491 2492 this = self._parse_query_modifiers(this) 2493 elif (table or nested) and self._match(TokenType.L_PAREN): 2494 if self._match(TokenType.PIVOT): 2495 this = self._parse_simplified_pivot() 2496 elif self._match(TokenType.FROM): 2497 this = exp.select("*").from_( 2498 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2499 ) 2500 else: 2501 this = ( 2502 self._parse_table() 2503 if table 2504 else self._parse_select(nested=True, parse_set_operation=False) 2505 ) 2506 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2507 2508 self._match_r_paren() 2509 2510 # We return early here so that the UNION isn't attached to the subquery by the 2511 # following call to _parse_set_operations, but instead becomes the parent node 2512 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2513 elif self._match(TokenType.VALUES, advance=False): 2514 this = self._parse_derived_table_values() 2515 elif from_: 2516 this = exp.select("*").from_(from_.this, copy=False) 2517 else: 2518 this = None 2519 2520 if parse_set_operation: 2521 return self._parse_set_operations(this) 2522 return this 2523 2524 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2525 if not skip_with_token and not self._match(TokenType.WITH): 2526 return None 2527 2528 comments = self._prev_comments 2529 recursive = self._match(TokenType.RECURSIVE) 2530 2531 expressions = [] 2532 while True: 2533 expressions.append(self._parse_cte()) 2534 2535 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2536 break 2537 else: 2538 self._match(TokenType.WITH) 2539 2540 return self.expression( 2541 exp.With, comments=comments, expressions=expressions, recursive=recursive 2542 ) 2543 2544 def _parse_cte(self) -> exp.CTE: 2545 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2546 if not alias or not alias.this: 2547 self.raise_error("Expected CTE to have alias") 2548 2549 self._match(TokenType.ALIAS) 2550 return self.expression( 2551 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2552 ) 2553 2554 def _parse_table_alias( 2555 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2556 ) -> t.Optional[exp.TableAlias]: 2557 any_token = self._match(TokenType.ALIAS) 2558 alias = ( 2559 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2560 or self._parse_string_as_identifier() 2561 ) 2562 2563 index = self._index 2564 if self._match(TokenType.L_PAREN): 2565 columns = self._parse_csv(self._parse_function_parameter) 2566 self._match_r_paren() if columns else self._retreat(index) 2567 else: 2568 columns = None 2569 2570 if not alias and not columns: 2571 return None 2572 2573 return self.expression(exp.TableAlias, this=alias, columns=columns) 2574 2575 def _parse_subquery( 2576 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2577 ) -> t.Optional[exp.Subquery]: 2578 if not this: 2579 return None 2580 2581 return self.expression( 2582 exp.Subquery, 2583 this=this, 2584 pivots=self._parse_pivots(), 2585 alias=self._parse_table_alias() if parse_alias else None, 2586 ) 2587 2588 def _implicit_unnests_to_explicit(self, this: E) -> E: 2589 from sqlglot.optimizer.normalize_identifiers import ( 2590 normalize_identifiers as _norm, 2591 ) 2592 2593 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2594 for i, join in enumerate(this.args.get("joins") or []): 2595 table = join.this 2596 normalized_table = table.copy() 2597 normalized_table.meta["maybe_column"] = True 2598 normalized_table = _norm(normalized_table, dialect=self.dialect) 2599 2600 if isinstance(table, exp.Table) and not join.args.get("on"): 2601 if normalized_table.parts[0].name in refs: 2602 table_as_column = table.to_column() 2603 unnest = exp.Unnest(expressions=[table_as_column]) 2604 2605 # Table.to_column creates a parent Alias node that we want to convert to 2606 # a TableAlias and attach to the Unnest, so it matches the parser's output 2607 if isinstance(table.args.get("alias"), exp.TableAlias): 2608 table_as_column.replace(table_as_column.this) 2609 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2610 2611 table.replace(unnest) 2612 2613 refs.add(normalized_table.alias_or_name) 2614 2615 return this 2616 2617 def _parse_query_modifiers( 2618 self, this: t.Optional[exp.Expression] 2619 ) -> t.Optional[exp.Expression]: 2620 if isinstance(this, (exp.Query, exp.Table)): 2621 for join in iter(self._parse_join, None): 2622 this.append("joins", join) 2623 for lateral in iter(self._parse_lateral, None): 2624 this.append("laterals", lateral) 2625 2626 while True: 2627 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2628 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2629 key, expression = parser(self) 2630 2631 if expression: 2632 this.set(key, expression) 2633 if key == "limit": 2634 offset = expression.args.pop("offset", None) 2635 2636 if offset: 2637 offset = exp.Offset(expression=offset) 2638 this.set("offset", offset) 2639 2640 limit_by_expressions = expression.expressions 2641 expression.set("expressions", None) 2642 offset.set("expressions", limit_by_expressions) 2643 continue 2644 break 2645 2646 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2647 this = self._implicit_unnests_to_explicit(this) 2648 2649 return this 2650 2651 def _parse_hint(self) -> t.Optional[exp.Hint]: 2652 if self._match(TokenType.HINT): 2653 hints = [] 2654 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2655 hints.extend(hint) 2656 2657 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2658 self.raise_error("Expected */ after HINT") 2659 2660 return self.expression(exp.Hint, expressions=hints) 2661 2662 return None 2663 2664 def _parse_into(self) -> t.Optional[exp.Into]: 2665 if not self._match(TokenType.INTO): 2666 return None 2667 2668 temp = self._match(TokenType.TEMPORARY) 2669 unlogged = self._match_text_seq("UNLOGGED") 2670 self._match(TokenType.TABLE) 2671 2672 return self.expression( 2673 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2674 ) 2675 2676 def _parse_from( 2677 self, joins: bool = False, skip_from_token: bool = False 2678 ) -> t.Optional[exp.From]: 2679 if not skip_from_token and not self._match(TokenType.FROM): 2680 return None 2681 2682 return self.expression( 2683 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2684 ) 2685 2686 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2687 if not self._match(TokenType.MATCH_RECOGNIZE): 2688 return None 2689 2690 self._match_l_paren() 2691 2692 partition = self._parse_partition_by() 2693 order = self._parse_order() 2694 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2695 2696 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2697 rows = exp.var("ONE ROW PER MATCH") 2698 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2699 text = "ALL ROWS PER MATCH" 2700 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2701 text += " SHOW EMPTY MATCHES" 2702 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2703 text += " OMIT EMPTY MATCHES" 2704 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2705 text += " WITH UNMATCHED ROWS" 2706 rows = exp.var(text) 2707 else: 2708 rows = None 2709 2710 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2711 text = "AFTER MATCH SKIP" 2712 if self._match_text_seq("PAST", "LAST", "ROW"): 2713 text += " PAST LAST ROW" 2714 elif self._match_text_seq("TO", "NEXT", "ROW"): 2715 text += " TO NEXT ROW" 2716 elif self._match_text_seq("TO", "FIRST"): 2717 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2718 elif self._match_text_seq("TO", "LAST"): 2719 text += f" TO LAST {self._advance_any().text}" # type: ignore 2720 after = exp.var(text) 2721 else: 2722 after = None 2723 2724 if self._match_text_seq("PATTERN"): 2725 self._match_l_paren() 2726 2727 if not self._curr: 2728 self.raise_error("Expecting )", self._curr) 2729 2730 paren = 1 2731 start = self._curr 2732 2733 while self._curr and paren > 0: 2734 if self._curr.token_type == TokenType.L_PAREN: 2735 paren += 1 2736 if self._curr.token_type == TokenType.R_PAREN: 2737 paren -= 1 2738 2739 end = self._prev 2740 self._advance() 2741 2742 if paren > 0: 2743 self.raise_error("Expecting )", self._curr) 2744 2745 pattern = exp.var(self._find_sql(start, end)) 2746 else: 2747 pattern = None 2748 2749 define = ( 2750 self._parse_csv(self._parse_name_as_expression) 2751 if self._match_text_seq("DEFINE") 2752 else None 2753 ) 2754 2755 self._match_r_paren() 2756 2757 return self.expression( 2758 exp.MatchRecognize, 2759 partition_by=partition, 2760 order=order, 2761 measures=measures, 2762 rows=rows, 2763 after=after, 2764 pattern=pattern, 2765 define=define, 2766 alias=self._parse_table_alias(), 2767 ) 2768 2769 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2770 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2771 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2772 cross_apply = False 2773 2774 if cross_apply is not None: 2775 this = self._parse_select(table=True) 2776 view = None 2777 outer = None 2778 elif self._match(TokenType.LATERAL): 2779 this = self._parse_select(table=True) 2780 view = self._match(TokenType.VIEW) 2781 outer = self._match(TokenType.OUTER) 2782 else: 2783 return None 2784 2785 if not this: 2786 this = ( 2787 self._parse_unnest() 2788 or self._parse_function() 2789 or self._parse_id_var(any_token=False) 2790 ) 2791 2792 while self._match(TokenType.DOT): 2793 this = exp.Dot( 2794 this=this, 2795 expression=self._parse_function() or self._parse_id_var(any_token=False), 2796 ) 2797 2798 if view: 2799 table = self._parse_id_var(any_token=False) 2800 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2801 table_alias: t.Optional[exp.TableAlias] = self.expression( 2802 exp.TableAlias, this=table, columns=columns 2803 ) 2804 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2805 # We move the alias from the lateral's child node to the lateral itself 2806 table_alias = this.args["alias"].pop() 2807 else: 2808 table_alias = self._parse_table_alias() 2809 2810 return self.expression( 2811 exp.Lateral, 2812 this=this, 2813 view=view, 2814 outer=outer, 2815 alias=table_alias, 2816 cross_apply=cross_apply, 2817 ) 2818 2819 def _parse_join_parts( 2820 self, 2821 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2822 return ( 2823 self._match_set(self.JOIN_METHODS) and self._prev, 2824 self._match_set(self.JOIN_SIDES) and self._prev, 2825 self._match_set(self.JOIN_KINDS) and self._prev, 2826 ) 2827 2828 def _parse_join( 2829 self, skip_join_token: bool = False, parse_bracket: bool = False 2830 ) -> t.Optional[exp.Join]: 2831 if self._match(TokenType.COMMA): 2832 return self.expression(exp.Join, this=self._parse_table()) 2833 2834 index = self._index 2835 method, side, kind = self._parse_join_parts() 2836 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2837 join = self._match(TokenType.JOIN) 2838 2839 if not skip_join_token and not join: 2840 self._retreat(index) 2841 kind = None 2842 method = None 2843 side = None 2844 2845 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2846 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2847 2848 if not skip_join_token and not join and not outer_apply and not cross_apply: 2849 return None 2850 2851 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2852 2853 if method: 2854 kwargs["method"] = method.text 2855 if side: 2856 kwargs["side"] = side.text 2857 if kind: 2858 kwargs["kind"] = kind.text 2859 if hint: 2860 kwargs["hint"] = hint 2861 2862 if self._match(TokenType.ON): 2863 kwargs["on"] = self._parse_conjunction() 2864 elif self._match(TokenType.USING): 2865 kwargs["using"] = self._parse_wrapped_id_vars() 2866 elif not (kind and kind.token_type == TokenType.CROSS): 2867 index = self._index 2868 join = self._parse_join() 2869 2870 if join and self._match(TokenType.ON): 2871 kwargs["on"] = self._parse_conjunction() 2872 elif join and self._match(TokenType.USING): 2873 kwargs["using"] = self._parse_wrapped_id_vars() 2874 else: 2875 join = None 2876 self._retreat(index) 2877 2878 kwargs["this"].set("joins", [join] if join else None) 2879 2880 comments = [c for token in (method, side, kind) if token for c in token.comments] 2881 return self.expression(exp.Join, comments=comments, **kwargs) 2882 2883 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2884 this = self._parse_conjunction() 2885 2886 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2887 return this 2888 2889 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2890 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2891 2892 return this 2893 2894 def _parse_index_params(self) -> exp.IndexParameters: 2895 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2896 2897 if self._match(TokenType.L_PAREN, advance=False): 2898 columns = self._parse_wrapped_csv(self._parse_with_operator) 2899 else: 2900 columns = None 2901 2902 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2903 partition_by = self._parse_partition_by() 2904 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2905 tablespace = ( 2906 self._parse_var(any_token=True) 2907 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2908 else None 2909 ) 2910 where = self._parse_where() 2911 2912 return self.expression( 2913 exp.IndexParameters, 2914 using=using, 2915 columns=columns, 2916 include=include, 2917 partition_by=partition_by, 2918 where=where, 2919 with_storage=with_storage, 2920 tablespace=tablespace, 2921 ) 2922 2923 def _parse_index( 2924 self, 2925 index: t.Optional[exp.Expression] = None, 2926 ) -> t.Optional[exp.Index]: 2927 if index: 2928 unique = None 2929 primary = None 2930 amp = None 2931 2932 self._match(TokenType.ON) 2933 self._match(TokenType.TABLE) # hive 2934 table = self._parse_table_parts(schema=True) 2935 else: 2936 unique = self._match(TokenType.UNIQUE) 2937 primary = self._match_text_seq("PRIMARY") 2938 amp = self._match_text_seq("AMP") 2939 2940 if not self._match(TokenType.INDEX): 2941 return None 2942 2943 index = self._parse_id_var() 2944 table = None 2945 2946 params = self._parse_index_params() 2947 2948 return self.expression( 2949 exp.Index, 2950 this=index, 2951 table=table, 2952 unique=unique, 2953 primary=primary, 2954 amp=amp, 2955 params=params, 2956 ) 2957 2958 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2959 hints: t.List[exp.Expression] = [] 2960 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2961 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2962 hints.append( 2963 self.expression( 2964 exp.WithTableHint, 2965 expressions=self._parse_csv( 2966 lambda: self._parse_function() or self._parse_var(any_token=True) 2967 ), 2968 ) 2969 ) 2970 self._match_r_paren() 2971 else: 2972 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2973 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2974 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2975 2976 self._match_texts(("INDEX", "KEY")) 2977 if self._match(TokenType.FOR): 2978 hint.set("target", self._advance_any() and self._prev.text.upper()) 2979 2980 hint.set("expressions", self._parse_wrapped_id_vars()) 2981 hints.append(hint) 2982 2983 return hints or None 2984 2985 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2986 return ( 2987 (not schema and self._parse_function(optional_parens=False)) 2988 or self._parse_id_var(any_token=False) 2989 or self._parse_string_as_identifier() 2990 or self._parse_placeholder() 2991 ) 2992 2993 def _parse_table_parts( 2994 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2995 ) -> exp.Table: 2996 catalog = None 2997 db = None 2998 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2999 3000 while self._match(TokenType.DOT): 3001 if catalog: 3002 # This allows nesting the table in arbitrarily many dot expressions if needed 3003 table = self.expression( 3004 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3005 ) 3006 else: 3007 catalog = db 3008 db = table 3009 # "" used for tsql FROM a..b case 3010 table = self._parse_table_part(schema=schema) or "" 3011 3012 if ( 3013 wildcard 3014 and self._is_connected() 3015 and (isinstance(table, exp.Identifier) or not table) 3016 and self._match(TokenType.STAR) 3017 ): 3018 if isinstance(table, exp.Identifier): 3019 table.args["this"] += "*" 3020 else: 3021 table = exp.Identifier(this="*") 3022 3023 if is_db_reference: 3024 catalog = db 3025 db = table 3026 table = None 3027 3028 if not table and not is_db_reference: 3029 self.raise_error(f"Expected table name but got {self._curr}") 3030 if not db and is_db_reference: 3031 self.raise_error(f"Expected database name but got {self._curr}") 3032 3033 return self.expression( 3034 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3035 ) 3036 3037 def _parse_table( 3038 self, 3039 schema: bool = False, 3040 joins: bool = False, 3041 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3042 parse_bracket: bool = False, 3043 is_db_reference: bool = False, 3044 ) -> t.Optional[exp.Expression]: 3045 lateral = self._parse_lateral() 3046 if lateral: 3047 return lateral 3048 3049 unnest = self._parse_unnest() 3050 if unnest: 3051 return unnest 3052 3053 values = self._parse_derived_table_values() 3054 if values: 3055 return values 3056 3057 subquery = self._parse_select(table=True) 3058 if subquery: 3059 if not subquery.args.get("pivots"): 3060 subquery.set("pivots", self._parse_pivots()) 3061 return subquery 3062 3063 bracket = parse_bracket and self._parse_bracket(None) 3064 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3065 3066 only = self._match(TokenType.ONLY) 3067 3068 this = t.cast( 3069 exp.Expression, 3070 bracket 3071 or self._parse_bracket( 3072 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3073 ), 3074 ) 3075 3076 if only: 3077 this.set("only", only) 3078 3079 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3080 self._match_text_seq("*") 3081 3082 if schema: 3083 return self._parse_schema(this=this) 3084 3085 version = self._parse_version() 3086 3087 if version: 3088 this.set("version", version) 3089 3090 if self.dialect.ALIAS_POST_TABLESAMPLE: 3091 table_sample = self._parse_table_sample() 3092 3093 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3094 if alias: 3095 this.set("alias", alias) 3096 3097 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3098 return self.expression( 3099 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3100 ) 3101 3102 this.set("hints", self._parse_table_hints()) 3103 3104 if not this.args.get("pivots"): 3105 this.set("pivots", self._parse_pivots()) 3106 3107 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3108 table_sample = self._parse_table_sample() 3109 3110 if table_sample: 3111 table_sample.set("this", this) 3112 this = table_sample 3113 3114 if joins: 3115 for join in iter(self._parse_join, None): 3116 this.append("joins", join) 3117 3118 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3119 this.set("ordinality", True) 3120 this.set("alias", self._parse_table_alias()) 3121 3122 return this 3123 3124 def _parse_version(self) -> t.Optional[exp.Version]: 3125 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3126 this = "TIMESTAMP" 3127 elif self._match(TokenType.VERSION_SNAPSHOT): 3128 this = "VERSION" 3129 else: 3130 return None 3131 3132 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3133 kind = self._prev.text.upper() 3134 start = self._parse_bitwise() 3135 self._match_texts(("TO", "AND")) 3136 end = self._parse_bitwise() 3137 expression: t.Optional[exp.Expression] = self.expression( 3138 exp.Tuple, expressions=[start, end] 3139 ) 3140 elif self._match_text_seq("CONTAINED", "IN"): 3141 kind = "CONTAINED IN" 3142 expression = self.expression( 3143 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3144 ) 3145 elif self._match(TokenType.ALL): 3146 kind = "ALL" 3147 expression = None 3148 else: 3149 self._match_text_seq("AS", "OF") 3150 kind = "AS OF" 3151 expression = self._parse_type() 3152 3153 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3154 3155 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3156 if not self._match(TokenType.UNNEST): 3157 return None 3158 3159 expressions = self._parse_wrapped_csv(self._parse_equality) 3160 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3161 3162 alias = self._parse_table_alias() if with_alias else None 3163 3164 if alias: 3165 if self.dialect.UNNEST_COLUMN_ONLY: 3166 if alias.args.get("columns"): 3167 self.raise_error("Unexpected extra column alias in unnest.") 3168 3169 alias.set("columns", [alias.this]) 3170 alias.set("this", None) 3171 3172 columns = alias.args.get("columns") or [] 3173 if offset and len(expressions) < len(columns): 3174 offset = columns.pop() 3175 3176 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3177 self._match(TokenType.ALIAS) 3178 offset = self._parse_id_var( 3179 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3180 ) or exp.to_identifier("offset") 3181 3182 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3183 3184 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3185 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3186 if not is_derived and not self._match_text_seq("VALUES"): 3187 return None 3188 3189 expressions = self._parse_csv(self._parse_value) 3190 alias = self._parse_table_alias() 3191 3192 if is_derived: 3193 self._match_r_paren() 3194 3195 return self.expression( 3196 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3197 ) 3198 3199 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3200 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3201 as_modifier and self._match_text_seq("USING", "SAMPLE") 3202 ): 3203 return None 3204 3205 bucket_numerator = None 3206 bucket_denominator = None 3207 bucket_field = None 3208 percent = None 3209 size = None 3210 seed = None 3211 3212 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3213 matched_l_paren = self._match(TokenType.L_PAREN) 3214 3215 if self.TABLESAMPLE_CSV: 3216 num = None 3217 expressions = self._parse_csv(self._parse_primary) 3218 else: 3219 expressions = None 3220 num = ( 3221 self._parse_factor() 3222 if self._match(TokenType.NUMBER, advance=False) 3223 else self._parse_primary() or self._parse_placeholder() 3224 ) 3225 3226 if self._match_text_seq("BUCKET"): 3227 bucket_numerator = self._parse_number() 3228 self._match_text_seq("OUT", "OF") 3229 bucket_denominator = bucket_denominator = self._parse_number() 3230 self._match(TokenType.ON) 3231 bucket_field = self._parse_field() 3232 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3233 percent = num 3234 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3235 size = num 3236 else: 3237 percent = num 3238 3239 if matched_l_paren: 3240 self._match_r_paren() 3241 3242 if self._match(TokenType.L_PAREN): 3243 method = self._parse_var(upper=True) 3244 seed = self._match(TokenType.COMMA) and self._parse_number() 3245 self._match_r_paren() 3246 elif self._match_texts(("SEED", "REPEATABLE")): 3247 seed = self._parse_wrapped(self._parse_number) 3248 3249 return self.expression( 3250 exp.TableSample, 3251 expressions=expressions, 3252 method=method, 3253 bucket_numerator=bucket_numerator, 3254 bucket_denominator=bucket_denominator, 3255 bucket_field=bucket_field, 3256 percent=percent, 3257 size=size, 3258 seed=seed, 3259 ) 3260 3261 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3262 return list(iter(self._parse_pivot, None)) or None 3263 3264 # https://duckdb.org/docs/sql/statements/pivot 3265 def _parse_simplified_pivot(self) -> exp.Pivot: 3266 def _parse_on() -> t.Optional[exp.Expression]: 3267 this = self._parse_bitwise() 3268 return self._parse_in(this) if self._match(TokenType.IN) else this 3269 3270 this = self._parse_table() 3271 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3272 using = self._match(TokenType.USING) and self._parse_csv( 3273 lambda: self._parse_alias(self._parse_function()) 3274 ) 3275 group = self._parse_group() 3276 return self.expression( 3277 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3278 ) 3279 3280 def _parse_pivot_in(self) -> exp.In: 3281 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3282 this = self._parse_conjunction() 3283 3284 self._match(TokenType.ALIAS) 3285 alias = self._parse_field() 3286 if alias: 3287 return self.expression(exp.PivotAlias, this=this, alias=alias) 3288 3289 return this 3290 3291 value = self._parse_column() 3292 3293 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3294 self.raise_error("Expecting IN (") 3295 3296 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3297 3298 self._match_r_paren() 3299 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3300 3301 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3302 index = self._index 3303 include_nulls = None 3304 3305 if self._match(TokenType.PIVOT): 3306 unpivot = False 3307 elif self._match(TokenType.UNPIVOT): 3308 unpivot = True 3309 3310 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3311 if self._match_text_seq("INCLUDE", "NULLS"): 3312 include_nulls = True 3313 elif self._match_text_seq("EXCLUDE", "NULLS"): 3314 include_nulls = False 3315 else: 3316 return None 3317 3318 expressions = [] 3319 3320 if not self._match(TokenType.L_PAREN): 3321 self._retreat(index) 3322 return None 3323 3324 if unpivot: 3325 expressions = self._parse_csv(self._parse_column) 3326 else: 3327 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3328 3329 if not expressions: 3330 self.raise_error("Failed to parse PIVOT's aggregation list") 3331 3332 if not self._match(TokenType.FOR): 3333 self.raise_error("Expecting FOR") 3334 3335 field = self._parse_pivot_in() 3336 3337 self._match_r_paren() 3338 3339 pivot = self.expression( 3340 exp.Pivot, 3341 expressions=expressions, 3342 field=field, 3343 unpivot=unpivot, 3344 include_nulls=include_nulls, 3345 ) 3346 3347 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3348 pivot.set("alias", self._parse_table_alias()) 3349 3350 if not unpivot: 3351 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3352 3353 columns: t.List[exp.Expression] = [] 3354 for fld in pivot.args["field"].expressions: 3355 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3356 for name in names: 3357 if self.PREFIXED_PIVOT_COLUMNS: 3358 name = f"{name}_{field_name}" if name else field_name 3359 else: 3360 name = f"{field_name}_{name}" if name else field_name 3361 3362 columns.append(exp.to_identifier(name)) 3363 3364 pivot.set("columns", columns) 3365 3366 return pivot 3367 3368 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3369 return [agg.alias for agg in aggregations] 3370 3371 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3372 if not skip_where_token and not self._match(TokenType.PREWHERE): 3373 return None 3374 3375 return self.expression( 3376 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3377 ) 3378 3379 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3380 if not skip_where_token and not self._match(TokenType.WHERE): 3381 return None 3382 3383 return self.expression( 3384 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3385 ) 3386 3387 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3388 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3389 return None 3390 3391 elements = defaultdict(list) 3392 3393 if self._match(TokenType.ALL): 3394 return self.expression(exp.Group, all=True) 3395 3396 while True: 3397 expressions = self._parse_csv(self._parse_conjunction) 3398 if expressions: 3399 elements["expressions"].extend(expressions) 3400 3401 grouping_sets = self._parse_grouping_sets() 3402 if grouping_sets: 3403 elements["grouping_sets"].extend(grouping_sets) 3404 3405 rollup = None 3406 cube = None 3407 totals = None 3408 3409 index = self._index 3410 with_ = self._match(TokenType.WITH) 3411 if self._match(TokenType.ROLLUP): 3412 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3413 elements["rollup"].extend(ensure_list(rollup)) 3414 3415 if self._match(TokenType.CUBE): 3416 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3417 elements["cube"].extend(ensure_list(cube)) 3418 3419 if self._match_text_seq("TOTALS"): 3420 totals = True 3421 elements["totals"] = True # type: ignore 3422 3423 if not (grouping_sets or rollup or cube or totals): 3424 if with_: 3425 self._retreat(index) 3426 break 3427 3428 return self.expression(exp.Group, **elements) # type: ignore 3429 3430 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3431 if not self._match(TokenType.GROUPING_SETS): 3432 return None 3433 3434 return self._parse_wrapped_csv(self._parse_grouping_set) 3435 3436 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3437 if self._match(TokenType.L_PAREN): 3438 grouping_set = self._parse_csv(self._parse_column) 3439 self._match_r_paren() 3440 return self.expression(exp.Tuple, expressions=grouping_set) 3441 3442 return self._parse_column() 3443 3444 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3445 if not skip_having_token and not self._match(TokenType.HAVING): 3446 return None 3447 return self.expression(exp.Having, this=self._parse_conjunction()) 3448 3449 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3450 if not self._match(TokenType.QUALIFY): 3451 return None 3452 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3453 3454 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3455 if skip_start_token: 3456 start = None 3457 elif self._match(TokenType.START_WITH): 3458 start = self._parse_conjunction() 3459 else: 3460 return None 3461 3462 self._match(TokenType.CONNECT_BY) 3463 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3464 exp.Prior, this=self._parse_bitwise() 3465 ) 3466 connect = self._parse_conjunction() 3467 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3468 3469 if not start and self._match(TokenType.START_WITH): 3470 start = self._parse_conjunction() 3471 3472 return self.expression(exp.Connect, start=start, connect=connect) 3473 3474 def _parse_name_as_expression(self) -> exp.Alias: 3475 return self.expression( 3476 exp.Alias, 3477 alias=self._parse_id_var(any_token=True), 3478 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3479 ) 3480 3481 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3482 if self._match_text_seq("INTERPOLATE"): 3483 return self._parse_wrapped_csv(self._parse_name_as_expression) 3484 return None 3485 3486 def _parse_order( 3487 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3488 ) -> t.Optional[exp.Expression]: 3489 siblings = None 3490 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3491 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3492 return this 3493 3494 siblings = True 3495 3496 return self.expression( 3497 exp.Order, 3498 this=this, 3499 expressions=self._parse_csv(self._parse_ordered), 3500 interpolate=self._parse_interpolate(), 3501 siblings=siblings, 3502 ) 3503 3504 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3505 if not self._match(token): 3506 return None 3507 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3508 3509 def _parse_ordered( 3510 self, parse_method: t.Optional[t.Callable] = None 3511 ) -> t.Optional[exp.Ordered]: 3512 this = parse_method() if parse_method else self._parse_conjunction() 3513 if not this: 3514 return None 3515 3516 asc = self._match(TokenType.ASC) 3517 desc = self._match(TokenType.DESC) or (asc and False) 3518 3519 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3520 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3521 3522 nulls_first = is_nulls_first or False 3523 explicitly_null_ordered = is_nulls_first or is_nulls_last 3524 3525 if ( 3526 not explicitly_null_ordered 3527 and ( 3528 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3529 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3530 ) 3531 and self.dialect.NULL_ORDERING != "nulls_are_last" 3532 ): 3533 nulls_first = True 3534 3535 if self._match_text_seq("WITH", "FILL"): 3536 with_fill = self.expression( 3537 exp.WithFill, 3538 **{ # type: ignore 3539 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3540 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3541 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3542 }, 3543 ) 3544 else: 3545 with_fill = None 3546 3547 return self.expression( 3548 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3549 ) 3550 3551 def _parse_limit( 3552 self, 3553 this: t.Optional[exp.Expression] = None, 3554 top: bool = False, 3555 skip_limit_token: bool = False, 3556 ) -> t.Optional[exp.Expression]: 3557 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3558 comments = self._prev_comments 3559 if top: 3560 limit_paren = self._match(TokenType.L_PAREN) 3561 expression = self._parse_term() if limit_paren else self._parse_number() 3562 3563 if limit_paren: 3564 self._match_r_paren() 3565 else: 3566 expression = self._parse_term() 3567 3568 if self._match(TokenType.COMMA): 3569 offset = expression 3570 expression = self._parse_term() 3571 else: 3572 offset = None 3573 3574 limit_exp = self.expression( 3575 exp.Limit, 3576 this=this, 3577 expression=expression, 3578 offset=offset, 3579 comments=comments, 3580 expressions=self._parse_limit_by(), 3581 ) 3582 3583 return limit_exp 3584 3585 if self._match(TokenType.FETCH): 3586 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3587 direction = self._prev.text.upper() if direction else "FIRST" 3588 3589 count = self._parse_field(tokens=self.FETCH_TOKENS) 3590 percent = self._match(TokenType.PERCENT) 3591 3592 self._match_set((TokenType.ROW, TokenType.ROWS)) 3593 3594 only = self._match_text_seq("ONLY") 3595 with_ties = self._match_text_seq("WITH", "TIES") 3596 3597 if only and with_ties: 3598 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3599 3600 return self.expression( 3601 exp.Fetch, 3602 direction=direction, 3603 count=count, 3604 percent=percent, 3605 with_ties=with_ties, 3606 ) 3607 3608 return this 3609 3610 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3611 if not self._match(TokenType.OFFSET): 3612 return this 3613 3614 count = self._parse_term() 3615 self._match_set((TokenType.ROW, TokenType.ROWS)) 3616 3617 return self.expression( 3618 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3619 ) 3620 3621 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3622 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3623 3624 def _parse_locks(self) -> t.List[exp.Lock]: 3625 locks = [] 3626 while True: 3627 if self._match_text_seq("FOR", "UPDATE"): 3628 update = True 3629 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3630 "LOCK", "IN", "SHARE", "MODE" 3631 ): 3632 update = False 3633 else: 3634 break 3635 3636 expressions = None 3637 if self._match_text_seq("OF"): 3638 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3639 3640 wait: t.Optional[bool | exp.Expression] = None 3641 if self._match_text_seq("NOWAIT"): 3642 wait = True 3643 elif self._match_text_seq("WAIT"): 3644 wait = self._parse_primary() 3645 elif self._match_text_seq("SKIP", "LOCKED"): 3646 wait = False 3647 3648 locks.append( 3649 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3650 ) 3651 3652 return locks 3653 3654 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3655 while this and self._match_set(self.SET_OPERATIONS): 3656 token_type = self._prev.token_type 3657 3658 if token_type == TokenType.UNION: 3659 operation = exp.Union 3660 elif token_type == TokenType.EXCEPT: 3661 operation = exp.Except 3662 else: 3663 operation = exp.Intersect 3664 3665 comments = self._prev.comments 3666 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3667 by_name = self._match_text_seq("BY", "NAME") 3668 expression = self._parse_select(nested=True, parse_set_operation=False) 3669 3670 this = self.expression( 3671 operation, 3672 comments=comments, 3673 this=this, 3674 distinct=distinct, 3675 by_name=by_name, 3676 expression=expression, 3677 ) 3678 3679 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3680 expression = this.expression 3681 3682 if expression: 3683 for arg in self.UNION_MODIFIERS: 3684 expr = expression.args.get(arg) 3685 if expr: 3686 this.set(arg, expr.pop()) 3687 3688 return this 3689 3690 def _parse_expression(self) -> t.Optional[exp.Expression]: 3691 return self._parse_alias(self._parse_conjunction()) 3692 3693 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3694 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3695 3696 def _parse_equality(self) -> t.Optional[exp.Expression]: 3697 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3698 3699 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3700 return self._parse_tokens(self._parse_range, self.COMPARISON) 3701 3702 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3703 this = this or self._parse_bitwise() 3704 negate = self._match(TokenType.NOT) 3705 3706 if self._match_set(self.RANGE_PARSERS): 3707 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3708 if not expression: 3709 return this 3710 3711 this = expression 3712 elif self._match(TokenType.ISNULL): 3713 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3714 3715 # Postgres supports ISNULL and NOTNULL for conditions. 3716 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3717 if self._match(TokenType.NOTNULL): 3718 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3719 this = self.expression(exp.Not, this=this) 3720 3721 if negate: 3722 this = self.expression(exp.Not, this=this) 3723 3724 if self._match(TokenType.IS): 3725 this = self._parse_is(this) 3726 3727 return this 3728 3729 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3730 index = self._index - 1 3731 negate = self._match(TokenType.NOT) 3732 3733 if self._match_text_seq("DISTINCT", "FROM"): 3734 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3735 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3736 3737 expression = self._parse_null() or self._parse_boolean() 3738 if not expression: 3739 self._retreat(index) 3740 return None 3741 3742 this = self.expression(exp.Is, this=this, expression=expression) 3743 return self.expression(exp.Not, this=this) if negate else this 3744 3745 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3746 unnest = self._parse_unnest(with_alias=False) 3747 if unnest: 3748 this = self.expression(exp.In, this=this, unnest=unnest) 3749 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3750 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3751 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3752 3753 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3754 this = self.expression(exp.In, this=this, query=expressions[0]) 3755 else: 3756 this = self.expression(exp.In, this=this, expressions=expressions) 3757 3758 if matched_l_paren: 3759 self._match_r_paren(this) 3760 elif not self._match(TokenType.R_BRACKET, expression=this): 3761 self.raise_error("Expecting ]") 3762 else: 3763 this = self.expression(exp.In, this=this, field=self._parse_field()) 3764 3765 return this 3766 3767 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3768 low = self._parse_bitwise() 3769 self._match(TokenType.AND) 3770 high = self._parse_bitwise() 3771 return self.expression(exp.Between, this=this, low=low, high=high) 3772 3773 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3774 if not self._match(TokenType.ESCAPE): 3775 return this 3776 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3777 3778 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3779 index = self._index 3780 3781 if not self._match(TokenType.INTERVAL) and match_interval: 3782 return None 3783 3784 if self._match(TokenType.STRING, advance=False): 3785 this = self._parse_primary() 3786 else: 3787 this = self._parse_term() 3788 3789 if not this or ( 3790 isinstance(this, exp.Column) 3791 and not this.table 3792 and not this.this.quoted 3793 and this.name.upper() == "IS" 3794 ): 3795 self._retreat(index) 3796 return None 3797 3798 unit = self._parse_function() or ( 3799 not self._match(TokenType.ALIAS, advance=False) 3800 and self._parse_var(any_token=True, upper=True) 3801 ) 3802 3803 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3804 # each INTERVAL expression into this canonical form so it's easy to transpile 3805 if this and this.is_number: 3806 this = exp.Literal.string(this.name) 3807 elif this and this.is_string: 3808 parts = this.name.split() 3809 3810 if len(parts) == 2: 3811 if unit: 3812 # This is not actually a unit, it's something else (e.g. a "window side") 3813 unit = None 3814 self._retreat(self._index - 1) 3815 3816 this = exp.Literal.string(parts[0]) 3817 unit = self.expression(exp.Var, this=parts[1].upper()) 3818 3819 return self.expression(exp.Interval, this=this, unit=unit) 3820 3821 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3822 this = self._parse_term() 3823 3824 while True: 3825 if self._match_set(self.BITWISE): 3826 this = self.expression( 3827 self.BITWISE[self._prev.token_type], 3828 this=this, 3829 expression=self._parse_term(), 3830 ) 3831 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3832 this = self.expression( 3833 exp.DPipe, 3834 this=this, 3835 expression=self._parse_term(), 3836 safe=not self.dialect.STRICT_STRING_CONCAT, 3837 ) 3838 elif self._match(TokenType.DQMARK): 3839 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3840 elif self._match_pair(TokenType.LT, TokenType.LT): 3841 this = self.expression( 3842 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3843 ) 3844 elif self._match_pair(TokenType.GT, TokenType.GT): 3845 this = self.expression( 3846 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3847 ) 3848 else: 3849 break 3850 3851 return this 3852 3853 def _parse_term(self) -> t.Optional[exp.Expression]: 3854 return self._parse_tokens(self._parse_factor, self.TERM) 3855 3856 def _parse_factor(self) -> t.Optional[exp.Expression]: 3857 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3858 this = parse_method() 3859 3860 while self._match_set(self.FACTOR): 3861 this = self.expression( 3862 self.FACTOR[self._prev.token_type], 3863 this=this, 3864 comments=self._prev_comments, 3865 expression=parse_method(), 3866 ) 3867 if isinstance(this, exp.Div): 3868 this.args["typed"] = self.dialect.TYPED_DIVISION 3869 this.args["safe"] = self.dialect.SAFE_DIVISION 3870 3871 return this 3872 3873 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3874 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3875 3876 def _parse_unary(self) -> t.Optional[exp.Expression]: 3877 if self._match_set(self.UNARY_PARSERS): 3878 return self.UNARY_PARSERS[self._prev.token_type](self) 3879 return self._parse_at_time_zone(self._parse_type()) 3880 3881 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3882 interval = parse_interval and self._parse_interval() 3883 if interval: 3884 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3885 while True: 3886 index = self._index 3887 self._match(TokenType.PLUS) 3888 3889 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3890 self._retreat(index) 3891 break 3892 3893 interval = self.expression( # type: ignore 3894 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3895 ) 3896 3897 return interval 3898 3899 index = self._index 3900 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3901 this = self._parse_column() 3902 3903 if data_type: 3904 if isinstance(this, exp.Literal): 3905 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3906 if parser: 3907 return parser(self, this, data_type) 3908 return self.expression(exp.Cast, this=this, to=data_type) 3909 if not data_type.expressions: 3910 self._retreat(index) 3911 return self._parse_column() 3912 return self._parse_column_ops(data_type) 3913 3914 return this and self._parse_column_ops(this) 3915 3916 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3917 this = self._parse_type() 3918 if not this: 3919 return None 3920 3921 return self.expression( 3922 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3923 ) 3924 3925 def _parse_types( 3926 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3927 ) -> t.Optional[exp.Expression]: 3928 index = self._index 3929 3930 prefix = self._match_text_seq("SYSUDTLIB", ".") 3931 3932 if not self._match_set(self.TYPE_TOKENS): 3933 identifier = allow_identifiers and self._parse_id_var( 3934 any_token=False, tokens=(TokenType.VAR,) 3935 ) 3936 if identifier: 3937 tokens = self.dialect.tokenize(identifier.name) 3938 3939 if len(tokens) != 1: 3940 self.raise_error("Unexpected identifier", self._prev) 3941 3942 if tokens[0].token_type in self.TYPE_TOKENS: 3943 self._prev = tokens[0] 3944 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3945 type_name = identifier.name 3946 3947 while self._match(TokenType.DOT): 3948 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3949 3950 return exp.DataType.build(type_name, udt=True) 3951 else: 3952 self._retreat(self._index - 1) 3953 return None 3954 else: 3955 return None 3956 3957 type_token = self._prev.token_type 3958 3959 if type_token == TokenType.PSEUDO_TYPE: 3960 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3961 3962 if type_token == TokenType.OBJECT_IDENTIFIER: 3963 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3964 3965 nested = type_token in self.NESTED_TYPE_TOKENS 3966 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3967 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3968 expressions = None 3969 maybe_func = False 3970 3971 if self._match(TokenType.L_PAREN): 3972 if is_struct: 3973 expressions = self._parse_csv(self._parse_struct_types) 3974 elif nested: 3975 expressions = self._parse_csv( 3976 lambda: self._parse_types( 3977 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3978 ) 3979 ) 3980 elif type_token in self.ENUM_TYPE_TOKENS: 3981 expressions = self._parse_csv(self._parse_equality) 3982 elif is_aggregate: 3983 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3984 any_token=False, tokens=(TokenType.VAR,) 3985 ) 3986 if not func_or_ident or not self._match(TokenType.COMMA): 3987 return None 3988 expressions = self._parse_csv( 3989 lambda: self._parse_types( 3990 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3991 ) 3992 ) 3993 expressions.insert(0, func_or_ident) 3994 else: 3995 expressions = self._parse_csv(self._parse_type_size) 3996 3997 if not expressions or not self._match(TokenType.R_PAREN): 3998 self._retreat(index) 3999 return None 4000 4001 maybe_func = True 4002 4003 this: t.Optional[exp.Expression] = None 4004 values: t.Optional[t.List[exp.Expression]] = None 4005 4006 if nested and self._match(TokenType.LT): 4007 if is_struct: 4008 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4009 else: 4010 expressions = self._parse_csv( 4011 lambda: self._parse_types( 4012 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4013 ) 4014 ) 4015 4016 if not self._match(TokenType.GT): 4017 self.raise_error("Expecting >") 4018 4019 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4020 values = self._parse_csv(self._parse_conjunction) 4021 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4022 4023 if type_token in self.TIMESTAMPS: 4024 if self._match_text_seq("WITH", "TIME", "ZONE"): 4025 maybe_func = False 4026 tz_type = ( 4027 exp.DataType.Type.TIMETZ 4028 if type_token in self.TIMES 4029 else exp.DataType.Type.TIMESTAMPTZ 4030 ) 4031 this = exp.DataType(this=tz_type, expressions=expressions) 4032 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4033 maybe_func = False 4034 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4035 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4036 maybe_func = False 4037 elif type_token == TokenType.INTERVAL: 4038 unit = self._parse_var() 4039 4040 if self._match_text_seq("TO"): 4041 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4042 else: 4043 span = None 4044 4045 if span or not unit: 4046 this = self.expression( 4047 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4048 ) 4049 else: 4050 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4051 4052 if maybe_func and check_func: 4053 index2 = self._index 4054 peek = self._parse_string() 4055 4056 if not peek: 4057 self._retreat(index) 4058 return None 4059 4060 self._retreat(index2) 4061 4062 if not this: 4063 if self._match_text_seq("UNSIGNED"): 4064 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4065 if not unsigned_type_token: 4066 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4067 4068 type_token = unsigned_type_token or type_token 4069 4070 this = exp.DataType( 4071 this=exp.DataType.Type[type_token.value], 4072 expressions=expressions, 4073 nested=nested, 4074 values=values, 4075 prefix=prefix, 4076 ) 4077 4078 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4079 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4080 4081 return this 4082 4083 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4084 index = self._index 4085 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4086 self._match(TokenType.COLON) 4087 column_def = self._parse_column_def(this) 4088 4089 if type_required and ( 4090 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4091 ): 4092 self._retreat(index) 4093 return self._parse_types() 4094 4095 return column_def 4096 4097 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4098 if not self._match_text_seq("AT", "TIME", "ZONE"): 4099 return this 4100 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4101 4102 def _parse_column(self) -> t.Optional[exp.Expression]: 4103 this = self._parse_column_reference() 4104 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4105 4106 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4107 this = self._parse_field() 4108 if ( 4109 not this 4110 and self._match(TokenType.VALUES, advance=False) 4111 and self.VALUES_FOLLOWED_BY_PAREN 4112 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4113 ): 4114 this = self._parse_id_var() 4115 4116 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4117 4118 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4119 this = self._parse_bracket(this) 4120 4121 while self._match_set(self.COLUMN_OPERATORS): 4122 op_token = self._prev.token_type 4123 op = self.COLUMN_OPERATORS.get(op_token) 4124 4125 if op_token == TokenType.DCOLON: 4126 field = self._parse_types() 4127 if not field: 4128 self.raise_error("Expected type") 4129 elif op and self._curr: 4130 field = self._parse_column_reference() 4131 else: 4132 field = self._parse_field(anonymous_func=True, any_token=True) 4133 4134 if isinstance(field, exp.Func) and this: 4135 # bigquery allows function calls like x.y.count(...) 4136 # SAFE.SUBSTR(...) 4137 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4138 this = exp.replace_tree( 4139 this, 4140 lambda n: ( 4141 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4142 if n.table 4143 else n.this 4144 ) 4145 if isinstance(n, exp.Column) 4146 else n, 4147 ) 4148 4149 if op: 4150 this = op(self, this, field) 4151 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4152 this = self.expression( 4153 exp.Column, 4154 this=field, 4155 table=this.this, 4156 db=this.args.get("table"), 4157 catalog=this.args.get("db"), 4158 ) 4159 else: 4160 this = self.expression(exp.Dot, this=this, expression=field) 4161 this = self._parse_bracket(this) 4162 return this 4163 4164 def _parse_primary(self) -> t.Optional[exp.Expression]: 4165 if self._match_set(self.PRIMARY_PARSERS): 4166 token_type = self._prev.token_type 4167 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4168 4169 if token_type == TokenType.STRING: 4170 expressions = [primary] 4171 while self._match(TokenType.STRING): 4172 expressions.append(exp.Literal.string(self._prev.text)) 4173 4174 if len(expressions) > 1: 4175 return self.expression(exp.Concat, expressions=expressions) 4176 4177 return primary 4178 4179 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4180 return exp.Literal.number(f"0.{self._prev.text}") 4181 4182 if self._match(TokenType.L_PAREN): 4183 comments = self._prev_comments 4184 query = self._parse_select() 4185 4186 if query: 4187 expressions = [query] 4188 else: 4189 expressions = self._parse_expressions() 4190 4191 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4192 4193 if isinstance(this, exp.UNWRAPPED_QUERIES): 4194 this = self._parse_set_operations( 4195 self._parse_subquery(this=this, parse_alias=False) 4196 ) 4197 elif isinstance(this, exp.Subquery): 4198 this = self._parse_subquery( 4199 this=self._parse_set_operations(this), parse_alias=False 4200 ) 4201 elif len(expressions) > 1: 4202 this = self.expression(exp.Tuple, expressions=expressions) 4203 else: 4204 this = self.expression(exp.Paren, this=this) 4205 4206 if this: 4207 this.add_comments(comments) 4208 4209 self._match_r_paren(expression=this) 4210 return this 4211 4212 return None 4213 4214 def _parse_field( 4215 self, 4216 any_token: bool = False, 4217 tokens: t.Optional[t.Collection[TokenType]] = None, 4218 anonymous_func: bool = False, 4219 ) -> t.Optional[exp.Expression]: 4220 return ( 4221 self._parse_primary() 4222 or self._parse_function(anonymous=anonymous_func) 4223 or self._parse_id_var(any_token=any_token, tokens=tokens) 4224 ) 4225 4226 def _parse_function( 4227 self, 4228 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4229 anonymous: bool = False, 4230 optional_parens: bool = True, 4231 ) -> t.Optional[exp.Expression]: 4232 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4233 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4234 fn_syntax = False 4235 if ( 4236 self._match(TokenType.L_BRACE, advance=False) 4237 and self._next 4238 and self._next.text.upper() == "FN" 4239 ): 4240 self._advance(2) 4241 fn_syntax = True 4242 4243 func = self._parse_function_call( 4244 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4245 ) 4246 4247 if fn_syntax: 4248 self._match(TokenType.R_BRACE) 4249 4250 return func 4251 4252 def _parse_function_call( 4253 self, 4254 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4255 anonymous: bool = False, 4256 optional_parens: bool = True, 4257 ) -> t.Optional[exp.Expression]: 4258 if not self._curr: 4259 return None 4260 4261 comments = self._curr.comments 4262 token_type = self._curr.token_type 4263 this = self._curr.text 4264 upper = this.upper() 4265 4266 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4267 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4268 self._advance() 4269 return self._parse_window(parser(self)) 4270 4271 if not self._next or self._next.token_type != TokenType.L_PAREN: 4272 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4273 self._advance() 4274 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4275 4276 return None 4277 4278 if token_type not in self.FUNC_TOKENS: 4279 return None 4280 4281 self._advance(2) 4282 4283 parser = self.FUNCTION_PARSERS.get(upper) 4284 if parser and not anonymous: 4285 this = parser(self) 4286 else: 4287 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4288 4289 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4290 this = self.expression(subquery_predicate, this=self._parse_select()) 4291 self._match_r_paren() 4292 return this 4293 4294 if functions is None: 4295 functions = self.FUNCTIONS 4296 4297 function = functions.get(upper) 4298 4299 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4300 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4301 4302 if alias: 4303 args = self._kv_to_prop_eq(args) 4304 4305 if function and not anonymous: 4306 if "dialect" in function.__code__.co_varnames: 4307 func = function(args, dialect=self.dialect) 4308 else: 4309 func = function(args) 4310 4311 func = self.validate_expression(func, args) 4312 if not self.dialect.NORMALIZE_FUNCTIONS: 4313 func.meta["name"] = this 4314 4315 this = func 4316 else: 4317 if token_type == TokenType.IDENTIFIER: 4318 this = exp.Identifier(this=this, quoted=True) 4319 this = self.expression(exp.Anonymous, this=this, expressions=args) 4320 4321 if isinstance(this, exp.Expression): 4322 this.add_comments(comments) 4323 4324 self._match_r_paren(this) 4325 return self._parse_window(this) 4326 4327 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4328 transformed = [] 4329 4330 for e in expressions: 4331 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4332 if isinstance(e, exp.Alias): 4333 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4334 4335 if not isinstance(e, exp.PropertyEQ): 4336 e = self.expression( 4337 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4338 ) 4339 4340 if isinstance(e.this, exp.Column): 4341 e.this.replace(e.this.this) 4342 4343 transformed.append(e) 4344 4345 return transformed 4346 4347 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4348 return self._parse_column_def(self._parse_id_var()) 4349 4350 def _parse_user_defined_function( 4351 self, kind: t.Optional[TokenType] = None 4352 ) -> t.Optional[exp.Expression]: 4353 this = self._parse_id_var() 4354 4355 while self._match(TokenType.DOT): 4356 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4357 4358 if not self._match(TokenType.L_PAREN): 4359 return this 4360 4361 expressions = self._parse_csv(self._parse_function_parameter) 4362 self._match_r_paren() 4363 return self.expression( 4364 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4365 ) 4366 4367 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4368 literal = self._parse_primary() 4369 if literal: 4370 return self.expression(exp.Introducer, this=token.text, expression=literal) 4371 4372 return self.expression(exp.Identifier, this=token.text) 4373 4374 def _parse_session_parameter(self) -> exp.SessionParameter: 4375 kind = None 4376 this = self._parse_id_var() or self._parse_primary() 4377 4378 if this and self._match(TokenType.DOT): 4379 kind = this.name 4380 this = self._parse_var() or self._parse_primary() 4381 4382 return self.expression(exp.SessionParameter, this=this, kind=kind) 4383 4384 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4385 index = self._index 4386 4387 if self._match(TokenType.L_PAREN): 4388 expressions = t.cast( 4389 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4390 ) 4391 4392 if not self._match(TokenType.R_PAREN): 4393 self._retreat(index) 4394 else: 4395 expressions = [self._parse_id_var()] 4396 4397 if self._match_set(self.LAMBDAS): 4398 return self.LAMBDAS[self._prev.token_type](self, expressions) 4399 4400 self._retreat(index) 4401 4402 this: t.Optional[exp.Expression] 4403 4404 if self._match(TokenType.DISTINCT): 4405 this = self.expression( 4406 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4407 ) 4408 else: 4409 this = self._parse_select_or_expression(alias=alias) 4410 4411 return self._parse_limit( 4412 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4413 ) 4414 4415 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4416 index = self._index 4417 4418 if not self.errors: 4419 try: 4420 if self._parse_select(nested=True): 4421 return this 4422 except ParseError: 4423 pass 4424 finally: 4425 self.errors.clear() 4426 self._retreat(index) 4427 4428 if not self._match(TokenType.L_PAREN): 4429 return this 4430 4431 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4432 4433 self._match_r_paren() 4434 return self.expression(exp.Schema, this=this, expressions=args) 4435 4436 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4437 return self._parse_column_def(self._parse_field(any_token=True)) 4438 4439 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4440 # column defs are not really columns, they're identifiers 4441 if isinstance(this, exp.Column): 4442 this = this.this 4443 4444 kind = self._parse_types(schema=True) 4445 4446 if self._match_text_seq("FOR", "ORDINALITY"): 4447 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4448 4449 constraints: t.List[exp.Expression] = [] 4450 4451 if not kind and self._match(TokenType.ALIAS): 4452 constraints.append( 4453 self.expression( 4454 exp.ComputedColumnConstraint, 4455 this=self._parse_conjunction(), 4456 persisted=self._match_text_seq("PERSISTED"), 4457 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4458 ) 4459 ) 4460 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4461 self._match(TokenType.ALIAS) 4462 constraints.append( 4463 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4464 ) 4465 4466 while True: 4467 constraint = self._parse_column_constraint() 4468 if not constraint: 4469 break 4470 constraints.append(constraint) 4471 4472 if not kind and not constraints: 4473 return this 4474 4475 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4476 4477 def _parse_auto_increment( 4478 self, 4479 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4480 start = None 4481 increment = None 4482 4483 if self._match(TokenType.L_PAREN, advance=False): 4484 args = self._parse_wrapped_csv(self._parse_bitwise) 4485 start = seq_get(args, 0) 4486 increment = seq_get(args, 1) 4487 elif self._match_text_seq("START"): 4488 start = self._parse_bitwise() 4489 self._match_text_seq("INCREMENT") 4490 increment = self._parse_bitwise() 4491 4492 if start and increment: 4493 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4494 4495 return exp.AutoIncrementColumnConstraint() 4496 4497 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4498 if not self._match_text_seq("REFRESH"): 4499 self._retreat(self._index - 1) 4500 return None 4501 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4502 4503 def _parse_compress(self) -> exp.CompressColumnConstraint: 4504 if self._match(TokenType.L_PAREN, advance=False): 4505 return self.expression( 4506 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4507 ) 4508 4509 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4510 4511 def _parse_generated_as_identity( 4512 self, 4513 ) -> ( 4514 exp.GeneratedAsIdentityColumnConstraint 4515 | exp.ComputedColumnConstraint 4516 | exp.GeneratedAsRowColumnConstraint 4517 ): 4518 if self._match_text_seq("BY", "DEFAULT"): 4519 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4520 this = self.expression( 4521 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4522 ) 4523 else: 4524 self._match_text_seq("ALWAYS") 4525 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4526 4527 self._match(TokenType.ALIAS) 4528 4529 if self._match_text_seq("ROW"): 4530 start = self._match_text_seq("START") 4531 if not start: 4532 self._match(TokenType.END) 4533 hidden = self._match_text_seq("HIDDEN") 4534 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4535 4536 identity = self._match_text_seq("IDENTITY") 4537 4538 if self._match(TokenType.L_PAREN): 4539 if self._match(TokenType.START_WITH): 4540 this.set("start", self._parse_bitwise()) 4541 if self._match_text_seq("INCREMENT", "BY"): 4542 this.set("increment", self._parse_bitwise()) 4543 if self._match_text_seq("MINVALUE"): 4544 this.set("minvalue", self._parse_bitwise()) 4545 if self._match_text_seq("MAXVALUE"): 4546 this.set("maxvalue", self._parse_bitwise()) 4547 4548 if self._match_text_seq("CYCLE"): 4549 this.set("cycle", True) 4550 elif self._match_text_seq("NO", "CYCLE"): 4551 this.set("cycle", False) 4552 4553 if not identity: 4554 this.set("expression", self._parse_bitwise()) 4555 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4556 args = self._parse_csv(self._parse_bitwise) 4557 this.set("start", seq_get(args, 0)) 4558 this.set("increment", seq_get(args, 1)) 4559 4560 self._match_r_paren() 4561 4562 return this 4563 4564 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4565 self._match_text_seq("LENGTH") 4566 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4567 4568 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4569 if self._match_text_seq("NULL"): 4570 return self.expression(exp.NotNullColumnConstraint) 4571 if self._match_text_seq("CASESPECIFIC"): 4572 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4573 if self._match_text_seq("FOR", "REPLICATION"): 4574 return self.expression(exp.NotForReplicationColumnConstraint) 4575 return None 4576 4577 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4578 if self._match(TokenType.CONSTRAINT): 4579 this = self._parse_id_var() 4580 else: 4581 this = None 4582 4583 if self._match_texts(self.CONSTRAINT_PARSERS): 4584 return self.expression( 4585 exp.ColumnConstraint, 4586 this=this, 4587 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4588 ) 4589 4590 return this 4591 4592 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4593 if not self._match(TokenType.CONSTRAINT): 4594 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4595 4596 return self.expression( 4597 exp.Constraint, 4598 this=self._parse_id_var(), 4599 expressions=self._parse_unnamed_constraints(), 4600 ) 4601 4602 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4603 constraints = [] 4604 while True: 4605 constraint = self._parse_unnamed_constraint() or self._parse_function() 4606 if not constraint: 4607 break 4608 constraints.append(constraint) 4609 4610 return constraints 4611 4612 def _parse_unnamed_constraint( 4613 self, constraints: t.Optional[t.Collection[str]] = None 4614 ) -> t.Optional[exp.Expression]: 4615 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4616 constraints or self.CONSTRAINT_PARSERS 4617 ): 4618 return None 4619 4620 constraint = self._prev.text.upper() 4621 if constraint not in self.CONSTRAINT_PARSERS: 4622 self.raise_error(f"No parser found for schema constraint {constraint}.") 4623 4624 return self.CONSTRAINT_PARSERS[constraint](self) 4625 4626 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4627 self._match_text_seq("KEY") 4628 return self.expression( 4629 exp.UniqueColumnConstraint, 4630 this=self._parse_schema(self._parse_id_var(any_token=False)), 4631 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4632 on_conflict=self._parse_on_conflict(), 4633 ) 4634 4635 def _parse_key_constraint_options(self) -> t.List[str]: 4636 options = [] 4637 while True: 4638 if not self._curr: 4639 break 4640 4641 if self._match(TokenType.ON): 4642 action = None 4643 on = self._advance_any() and self._prev.text 4644 4645 if self._match_text_seq("NO", "ACTION"): 4646 action = "NO ACTION" 4647 elif self._match_text_seq("CASCADE"): 4648 action = "CASCADE" 4649 elif self._match_text_seq("RESTRICT"): 4650 action = "RESTRICT" 4651 elif self._match_pair(TokenType.SET, TokenType.NULL): 4652 action = "SET NULL" 4653 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4654 action = "SET DEFAULT" 4655 else: 4656 self.raise_error("Invalid key constraint") 4657 4658 options.append(f"ON {on} {action}") 4659 elif self._match_text_seq("NOT", "ENFORCED"): 4660 options.append("NOT ENFORCED") 4661 elif self._match_text_seq("DEFERRABLE"): 4662 options.append("DEFERRABLE") 4663 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4664 options.append("INITIALLY DEFERRED") 4665 elif self._match_text_seq("NORELY"): 4666 options.append("NORELY") 4667 elif self._match_text_seq("MATCH", "FULL"): 4668 options.append("MATCH FULL") 4669 else: 4670 break 4671 4672 return options 4673 4674 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4675 if match and not self._match(TokenType.REFERENCES): 4676 return None 4677 4678 expressions = None 4679 this = self._parse_table(schema=True) 4680 options = self._parse_key_constraint_options() 4681 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4682 4683 def _parse_foreign_key(self) -> exp.ForeignKey: 4684 expressions = self._parse_wrapped_id_vars() 4685 reference = self._parse_references() 4686 options = {} 4687 4688 while self._match(TokenType.ON): 4689 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4690 self.raise_error("Expected DELETE or UPDATE") 4691 4692 kind = self._prev.text.lower() 4693 4694 if self._match_text_seq("NO", "ACTION"): 4695 action = "NO ACTION" 4696 elif self._match(TokenType.SET): 4697 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4698 action = "SET " + self._prev.text.upper() 4699 else: 4700 self._advance() 4701 action = self._prev.text.upper() 4702 4703 options[kind] = action 4704 4705 return self.expression( 4706 exp.ForeignKey, 4707 expressions=expressions, 4708 reference=reference, 4709 **options, # type: ignore 4710 ) 4711 4712 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4713 return self._parse_field() 4714 4715 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4716 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4717 self._retreat(self._index - 1) 4718 return None 4719 4720 id_vars = self._parse_wrapped_id_vars() 4721 return self.expression( 4722 exp.PeriodForSystemTimeConstraint, 4723 this=seq_get(id_vars, 0), 4724 expression=seq_get(id_vars, 1), 4725 ) 4726 4727 def _parse_primary_key( 4728 self, wrapped_optional: bool = False, in_props: bool = False 4729 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4730 desc = ( 4731 self._match_set((TokenType.ASC, TokenType.DESC)) 4732 and self._prev.token_type == TokenType.DESC 4733 ) 4734 4735 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4736 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4737 4738 expressions = self._parse_wrapped_csv( 4739 self._parse_primary_key_part, optional=wrapped_optional 4740 ) 4741 options = self._parse_key_constraint_options() 4742 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4743 4744 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4745 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4746 4747 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4748 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4749 return this 4750 4751 bracket_kind = self._prev.token_type 4752 expressions = self._parse_csv( 4753 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4754 ) 4755 4756 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4757 self.raise_error("Expected ]") 4758 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4759 self.raise_error("Expected }") 4760 4761 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4762 if bracket_kind == TokenType.L_BRACE: 4763 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4764 elif not this or this.name.upper() == "ARRAY": 4765 this = self.expression(exp.Array, expressions=expressions) 4766 else: 4767 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4768 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4769 4770 self._add_comments(this) 4771 return self._parse_bracket(this) 4772 4773 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4774 if self._match(TokenType.COLON): 4775 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4776 return this 4777 4778 def _parse_case(self) -> t.Optional[exp.Expression]: 4779 ifs = [] 4780 default = None 4781 4782 comments = self._prev_comments 4783 expression = self._parse_conjunction() 4784 4785 while self._match(TokenType.WHEN): 4786 this = self._parse_conjunction() 4787 self._match(TokenType.THEN) 4788 then = self._parse_conjunction() 4789 ifs.append(self.expression(exp.If, this=this, true=then)) 4790 4791 if self._match(TokenType.ELSE): 4792 default = self._parse_conjunction() 4793 4794 if not self._match(TokenType.END): 4795 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4796 default = exp.column("interval") 4797 else: 4798 self.raise_error("Expected END after CASE", self._prev) 4799 4800 return self.expression( 4801 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4802 ) 4803 4804 def _parse_if(self) -> t.Optional[exp.Expression]: 4805 if self._match(TokenType.L_PAREN): 4806 args = self._parse_csv(self._parse_conjunction) 4807 this = self.validate_expression(exp.If.from_arg_list(args), args) 4808 self._match_r_paren() 4809 else: 4810 index = self._index - 1 4811 4812 if self.NO_PAREN_IF_COMMANDS and index == 0: 4813 return self._parse_as_command(self._prev) 4814 4815 condition = self._parse_conjunction() 4816 4817 if not condition: 4818 self._retreat(index) 4819 return None 4820 4821 self._match(TokenType.THEN) 4822 true = self._parse_conjunction() 4823 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4824 self._match(TokenType.END) 4825 this = self.expression(exp.If, this=condition, true=true, false=false) 4826 4827 return this 4828 4829 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4830 if not self._match_text_seq("VALUE", "FOR"): 4831 self._retreat(self._index - 1) 4832 return None 4833 4834 return self.expression( 4835 exp.NextValueFor, 4836 this=self._parse_column(), 4837 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4838 ) 4839 4840 def _parse_extract(self) -> exp.Extract: 4841 this = self._parse_function() or self._parse_var() or self._parse_type() 4842 4843 if self._match(TokenType.FROM): 4844 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4845 4846 if not self._match(TokenType.COMMA): 4847 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4848 4849 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4850 4851 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4852 this = self._parse_conjunction() 4853 4854 if not self._match(TokenType.ALIAS): 4855 if self._match(TokenType.COMMA): 4856 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4857 4858 self.raise_error("Expected AS after CAST") 4859 4860 fmt = None 4861 to = self._parse_types() 4862 4863 if self._match(TokenType.FORMAT): 4864 fmt_string = self._parse_string() 4865 fmt = self._parse_at_time_zone(fmt_string) 4866 4867 if not to: 4868 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4869 if to.this in exp.DataType.TEMPORAL_TYPES: 4870 this = self.expression( 4871 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4872 this=this, 4873 format=exp.Literal.string( 4874 format_time( 4875 fmt_string.this if fmt_string else "", 4876 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4877 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4878 ) 4879 ), 4880 ) 4881 4882 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4883 this.set("zone", fmt.args["zone"]) 4884 return this 4885 elif not to: 4886 self.raise_error("Expected TYPE after CAST") 4887 elif isinstance(to, exp.Identifier): 4888 to = exp.DataType.build(to.name, udt=True) 4889 elif to.this == exp.DataType.Type.CHAR: 4890 if self._match(TokenType.CHARACTER_SET): 4891 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4892 4893 return self.expression( 4894 exp.Cast if strict else exp.TryCast, 4895 this=this, 4896 to=to, 4897 format=fmt, 4898 safe=safe, 4899 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4900 ) 4901 4902 def _parse_string_agg(self) -> exp.Expression: 4903 if self._match(TokenType.DISTINCT): 4904 args: t.List[t.Optional[exp.Expression]] = [ 4905 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4906 ] 4907 if self._match(TokenType.COMMA): 4908 args.extend(self._parse_csv(self._parse_conjunction)) 4909 else: 4910 args = self._parse_csv(self._parse_conjunction) # type: ignore 4911 4912 index = self._index 4913 if not self._match(TokenType.R_PAREN) and args: 4914 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4915 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4916 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4917 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4918 4919 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4920 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4921 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4922 if not self._match_text_seq("WITHIN", "GROUP"): 4923 self._retreat(index) 4924 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4925 4926 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4927 order = self._parse_order(this=seq_get(args, 0)) 4928 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4929 4930 def _parse_convert( 4931 self, strict: bool, safe: t.Optional[bool] = None 4932 ) -> t.Optional[exp.Expression]: 4933 this = self._parse_bitwise() 4934 4935 if self._match(TokenType.USING): 4936 to: t.Optional[exp.Expression] = self.expression( 4937 exp.CharacterSet, this=self._parse_var() 4938 ) 4939 elif self._match(TokenType.COMMA): 4940 to = self._parse_types() 4941 else: 4942 to = None 4943 4944 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4945 4946 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4947 """ 4948 There are generally two variants of the DECODE function: 4949 4950 - DECODE(bin, charset) 4951 - DECODE(expression, search, result [, search, result] ... [, default]) 4952 4953 The second variant will always be parsed into a CASE expression. Note that NULL 4954 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4955 instead of relying on pattern matching. 4956 """ 4957 args = self._parse_csv(self._parse_conjunction) 4958 4959 if len(args) < 3: 4960 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4961 4962 expression, *expressions = args 4963 if not expression: 4964 return None 4965 4966 ifs = [] 4967 for search, result in zip(expressions[::2], expressions[1::2]): 4968 if not search or not result: 4969 return None 4970 4971 if isinstance(search, exp.Literal): 4972 ifs.append( 4973 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4974 ) 4975 elif isinstance(search, exp.Null): 4976 ifs.append( 4977 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4978 ) 4979 else: 4980 cond = exp.or_( 4981 exp.EQ(this=expression.copy(), expression=search), 4982 exp.and_( 4983 exp.Is(this=expression.copy(), expression=exp.Null()), 4984 exp.Is(this=search.copy(), expression=exp.Null()), 4985 copy=False, 4986 ), 4987 copy=False, 4988 ) 4989 ifs.append(exp.If(this=cond, true=result)) 4990 4991 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4992 4993 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4994 self._match_text_seq("KEY") 4995 key = self._parse_column() 4996 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4997 self._match_text_seq("VALUE") 4998 value = self._parse_bitwise() 4999 5000 if not key and not value: 5001 return None 5002 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5003 5004 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5005 if not this or not self._match_text_seq("FORMAT", "JSON"): 5006 return this 5007 5008 return self.expression(exp.FormatJson, this=this) 5009 5010 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5011 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5012 for value in values: 5013 if self._match_text_seq(value, "ON", on): 5014 return f"{value} ON {on}" 5015 5016 return None 5017 5018 @t.overload 5019 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5020 5021 @t.overload 5022 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5023 5024 def _parse_json_object(self, agg=False): 5025 star = self._parse_star() 5026 expressions = ( 5027 [star] 5028 if star 5029 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5030 ) 5031 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5032 5033 unique_keys = None 5034 if self._match_text_seq("WITH", "UNIQUE"): 5035 unique_keys = True 5036 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5037 unique_keys = False 5038 5039 self._match_text_seq("KEYS") 5040 5041 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5042 self._parse_type() 5043 ) 5044 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5045 5046 return self.expression( 5047 exp.JSONObjectAgg if agg else exp.JSONObject, 5048 expressions=expressions, 5049 null_handling=null_handling, 5050 unique_keys=unique_keys, 5051 return_type=return_type, 5052 encoding=encoding, 5053 ) 5054 5055 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5056 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5057 if not self._match_text_seq("NESTED"): 5058 this = self._parse_id_var() 5059 kind = self._parse_types(allow_identifiers=False) 5060 nested = None 5061 else: 5062 this = None 5063 kind = None 5064 nested = True 5065 5066 path = self._match_text_seq("PATH") and self._parse_string() 5067 nested_schema = nested and self._parse_json_schema() 5068 5069 return self.expression( 5070 exp.JSONColumnDef, 5071 this=this, 5072 kind=kind, 5073 path=path, 5074 nested_schema=nested_schema, 5075 ) 5076 5077 def _parse_json_schema(self) -> exp.JSONSchema: 5078 self._match_text_seq("COLUMNS") 5079 return self.expression( 5080 exp.JSONSchema, 5081 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5082 ) 5083 5084 def _parse_json_table(self) -> exp.JSONTable: 5085 this = self._parse_format_json(self._parse_bitwise()) 5086 path = self._match(TokenType.COMMA) and self._parse_string() 5087 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5088 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5089 schema = self._parse_json_schema() 5090 5091 return exp.JSONTable( 5092 this=this, 5093 schema=schema, 5094 path=path, 5095 error_handling=error_handling, 5096 empty_handling=empty_handling, 5097 ) 5098 5099 def _parse_match_against(self) -> exp.MatchAgainst: 5100 expressions = self._parse_csv(self._parse_column) 5101 5102 self._match_text_seq(")", "AGAINST", "(") 5103 5104 this = self._parse_string() 5105 5106 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5107 modifier = "IN NATURAL LANGUAGE MODE" 5108 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5109 modifier = f"{modifier} WITH QUERY EXPANSION" 5110 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5111 modifier = "IN BOOLEAN MODE" 5112 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5113 modifier = "WITH QUERY EXPANSION" 5114 else: 5115 modifier = None 5116 5117 return self.expression( 5118 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5119 ) 5120 5121 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5122 def _parse_open_json(self) -> exp.OpenJSON: 5123 this = self._parse_bitwise() 5124 path = self._match(TokenType.COMMA) and self._parse_string() 5125 5126 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5127 this = self._parse_field(any_token=True) 5128 kind = self._parse_types() 5129 path = self._parse_string() 5130 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5131 5132 return self.expression( 5133 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5134 ) 5135 5136 expressions = None 5137 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5138 self._match_l_paren() 5139 expressions = self._parse_csv(_parse_open_json_column_def) 5140 5141 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5142 5143 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5144 args = self._parse_csv(self._parse_bitwise) 5145 5146 if self._match(TokenType.IN): 5147 return self.expression( 5148 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5149 ) 5150 5151 if haystack_first: 5152 haystack = seq_get(args, 0) 5153 needle = seq_get(args, 1) 5154 else: 5155 needle = seq_get(args, 0) 5156 haystack = seq_get(args, 1) 5157 5158 return self.expression( 5159 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5160 ) 5161 5162 def _parse_predict(self) -> exp.Predict: 5163 self._match_text_seq("MODEL") 5164 this = self._parse_table() 5165 5166 self._match(TokenType.COMMA) 5167 self._match_text_seq("TABLE") 5168 5169 return self.expression( 5170 exp.Predict, 5171 this=this, 5172 expression=self._parse_table(), 5173 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5174 ) 5175 5176 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5177 args = self._parse_csv(self._parse_table) 5178 return exp.JoinHint(this=func_name.upper(), expressions=args) 5179 5180 def _parse_substring(self) -> exp.Substring: 5181 # Postgres supports the form: substring(string [from int] [for int]) 5182 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5183 5184 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5185 5186 if self._match(TokenType.FROM): 5187 args.append(self._parse_bitwise()) 5188 if self._match(TokenType.FOR): 5189 args.append(self._parse_bitwise()) 5190 5191 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5192 5193 def _parse_trim(self) -> exp.Trim: 5194 # https://www.w3resource.com/sql/character-functions/trim.php 5195 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5196 5197 position = None 5198 collation = None 5199 expression = None 5200 5201 if self._match_texts(self.TRIM_TYPES): 5202 position = self._prev.text.upper() 5203 5204 this = self._parse_bitwise() 5205 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5206 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5207 expression = self._parse_bitwise() 5208 5209 if invert_order: 5210 this, expression = expression, this 5211 5212 if self._match(TokenType.COLLATE): 5213 collation = self._parse_bitwise() 5214 5215 return self.expression( 5216 exp.Trim, this=this, position=position, expression=expression, collation=collation 5217 ) 5218 5219 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5220 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5221 5222 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5223 return self._parse_window(self._parse_id_var(), alias=True) 5224 5225 def _parse_respect_or_ignore_nulls( 5226 self, this: t.Optional[exp.Expression] 5227 ) -> t.Optional[exp.Expression]: 5228 if self._match_text_seq("IGNORE", "NULLS"): 5229 return self.expression(exp.IgnoreNulls, this=this) 5230 if self._match_text_seq("RESPECT", "NULLS"): 5231 return self.expression(exp.RespectNulls, this=this) 5232 return this 5233 5234 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5235 if self._match(TokenType.HAVING): 5236 self._match_texts(("MAX", "MIN")) 5237 max = self._prev.text.upper() != "MIN" 5238 return self.expression( 5239 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5240 ) 5241 5242 return this 5243 5244 def _parse_window( 5245 self, this: t.Optional[exp.Expression], alias: bool = False 5246 ) -> t.Optional[exp.Expression]: 5247 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5248 self._match(TokenType.WHERE) 5249 this = self.expression( 5250 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5251 ) 5252 self._match_r_paren() 5253 5254 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5255 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5256 if self._match_text_seq("WITHIN", "GROUP"): 5257 order = self._parse_wrapped(self._parse_order) 5258 this = self.expression(exp.WithinGroup, this=this, expression=order) 5259 5260 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5261 # Some dialects choose to implement and some do not. 5262 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5263 5264 # There is some code above in _parse_lambda that handles 5265 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5266 5267 # The below changes handle 5268 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5269 5270 # Oracle allows both formats 5271 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5272 # and Snowflake chose to do the same for familiarity 5273 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5274 if isinstance(this, exp.AggFunc): 5275 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5276 5277 if ignore_respect and ignore_respect is not this: 5278 ignore_respect.replace(ignore_respect.this) 5279 this = self.expression(ignore_respect.__class__, this=this) 5280 5281 this = self._parse_respect_or_ignore_nulls(this) 5282 5283 # bigquery select from window x AS (partition by ...) 5284 if alias: 5285 over = None 5286 self._match(TokenType.ALIAS) 5287 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5288 return this 5289 else: 5290 over = self._prev.text.upper() 5291 5292 if not self._match(TokenType.L_PAREN): 5293 return self.expression( 5294 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5295 ) 5296 5297 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5298 5299 first = self._match(TokenType.FIRST) 5300 if self._match_text_seq("LAST"): 5301 first = False 5302 5303 partition, order = self._parse_partition_and_order() 5304 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5305 5306 if kind: 5307 self._match(TokenType.BETWEEN) 5308 start = self._parse_window_spec() 5309 self._match(TokenType.AND) 5310 end = self._parse_window_spec() 5311 5312 spec = self.expression( 5313 exp.WindowSpec, 5314 kind=kind, 5315 start=start["value"], 5316 start_side=start["side"], 5317 end=end["value"], 5318 end_side=end["side"], 5319 ) 5320 else: 5321 spec = None 5322 5323 self._match_r_paren() 5324 5325 window = self.expression( 5326 exp.Window, 5327 this=this, 5328 partition_by=partition, 5329 order=order, 5330 spec=spec, 5331 alias=window_alias, 5332 over=over, 5333 first=first, 5334 ) 5335 5336 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5337 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5338 return self._parse_window(window, alias=alias) 5339 5340 return window 5341 5342 def _parse_partition_and_order( 5343 self, 5344 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5345 return self._parse_partition_by(), self._parse_order() 5346 5347 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5348 self._match(TokenType.BETWEEN) 5349 5350 return { 5351 "value": ( 5352 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5353 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5354 or self._parse_bitwise() 5355 ), 5356 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5357 } 5358 5359 def _parse_alias( 5360 self, this: t.Optional[exp.Expression], explicit: bool = False 5361 ) -> t.Optional[exp.Expression]: 5362 any_token = self._match(TokenType.ALIAS) 5363 comments = self._prev_comments 5364 5365 if explicit and not any_token: 5366 return this 5367 5368 if self._match(TokenType.L_PAREN): 5369 aliases = self.expression( 5370 exp.Aliases, 5371 comments=comments, 5372 this=this, 5373 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5374 ) 5375 self._match_r_paren(aliases) 5376 return aliases 5377 5378 alias = self._parse_id_var(any_token) or ( 5379 self.STRING_ALIASES and self._parse_string_as_identifier() 5380 ) 5381 5382 if alias: 5383 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5384 column = this.this 5385 5386 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5387 if not this.comments and column and column.comments: 5388 this.comments = column.comments 5389 column.comments = None 5390 5391 return this 5392 5393 def _parse_id_var( 5394 self, 5395 any_token: bool = True, 5396 tokens: t.Optional[t.Collection[TokenType]] = None, 5397 ) -> t.Optional[exp.Expression]: 5398 identifier = self._parse_identifier() 5399 5400 if identifier: 5401 return identifier 5402 5403 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5404 quoted = self._prev.token_type == TokenType.STRING 5405 return exp.Identifier(this=self._prev.text, quoted=quoted) 5406 5407 return None 5408 5409 def _parse_string(self) -> t.Optional[exp.Expression]: 5410 if self._match_set(self.STRING_PARSERS): 5411 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5412 return self._parse_placeholder() 5413 5414 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5415 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5416 5417 def _parse_number(self) -> t.Optional[exp.Expression]: 5418 if self._match_set(self.NUMERIC_PARSERS): 5419 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5420 return self._parse_placeholder() 5421 5422 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5423 if self._match(TokenType.IDENTIFIER): 5424 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5425 return self._parse_placeholder() 5426 5427 def _parse_var( 5428 self, 5429 any_token: bool = False, 5430 tokens: t.Optional[t.Collection[TokenType]] = None, 5431 upper: bool = False, 5432 ) -> t.Optional[exp.Expression]: 5433 if ( 5434 (any_token and self._advance_any()) 5435 or self._match(TokenType.VAR) 5436 or (self._match_set(tokens) if tokens else False) 5437 ): 5438 return self.expression( 5439 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5440 ) 5441 return self._parse_placeholder() 5442 5443 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5444 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5445 self._advance() 5446 return self._prev 5447 return None 5448 5449 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5450 return self._parse_var() or self._parse_string() 5451 5452 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5453 return self._parse_primary() or self._parse_var(any_token=True) 5454 5455 def _parse_null(self) -> t.Optional[exp.Expression]: 5456 if self._match_set(self.NULL_TOKENS): 5457 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5458 return self._parse_placeholder() 5459 5460 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5461 if self._match(TokenType.TRUE): 5462 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5463 if self._match(TokenType.FALSE): 5464 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5465 return self._parse_placeholder() 5466 5467 def _parse_star(self) -> t.Optional[exp.Expression]: 5468 if self._match(TokenType.STAR): 5469 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5470 return self._parse_placeholder() 5471 5472 def _parse_parameter(self) -> exp.Parameter: 5473 self._match(TokenType.L_BRACE) 5474 this = self._parse_identifier() or self._parse_primary_or_var() 5475 expression = self._match(TokenType.COLON) and ( 5476 self._parse_identifier() or self._parse_primary_or_var() 5477 ) 5478 self._match(TokenType.R_BRACE) 5479 return self.expression(exp.Parameter, this=this, expression=expression) 5480 5481 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5482 if self._match_set(self.PLACEHOLDER_PARSERS): 5483 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5484 if placeholder: 5485 return placeholder 5486 self._advance(-1) 5487 return None 5488 5489 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5490 if not self._match(TokenType.EXCEPT): 5491 return None 5492 if self._match(TokenType.L_PAREN, advance=False): 5493 return self._parse_wrapped_csv(self._parse_column) 5494 5495 except_column = self._parse_column() 5496 return [except_column] if except_column else None 5497 5498 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5499 if not self._match(TokenType.REPLACE): 5500 return None 5501 if self._match(TokenType.L_PAREN, advance=False): 5502 return self._parse_wrapped_csv(self._parse_expression) 5503 5504 replace_expression = self._parse_expression() 5505 return [replace_expression] if replace_expression else None 5506 5507 def _parse_csv( 5508 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5509 ) -> t.List[exp.Expression]: 5510 parse_result = parse_method() 5511 items = [parse_result] if parse_result is not None else [] 5512 5513 while self._match(sep): 5514 self._add_comments(parse_result) 5515 parse_result = parse_method() 5516 if parse_result is not None: 5517 items.append(parse_result) 5518 5519 return items 5520 5521 def _parse_tokens( 5522 self, parse_method: t.Callable, expressions: t.Dict 5523 ) -> t.Optional[exp.Expression]: 5524 this = parse_method() 5525 5526 while self._match_set(expressions): 5527 this = self.expression( 5528 expressions[self._prev.token_type], 5529 this=this, 5530 comments=self._prev_comments, 5531 expression=parse_method(), 5532 ) 5533 5534 return this 5535 5536 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5537 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5538 5539 def _parse_wrapped_csv( 5540 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5541 ) -> t.List[exp.Expression]: 5542 return self._parse_wrapped( 5543 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5544 ) 5545 5546 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5547 wrapped = self._match(TokenType.L_PAREN) 5548 if not wrapped and not optional: 5549 self.raise_error("Expecting (") 5550 parse_result = parse_method() 5551 if wrapped: 5552 self._match_r_paren() 5553 return parse_result 5554 5555 def _parse_expressions(self) -> t.List[exp.Expression]: 5556 return self._parse_csv(self._parse_expression) 5557 5558 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5559 return self._parse_select() or self._parse_set_operations( 5560 self._parse_expression() if alias else self._parse_conjunction() 5561 ) 5562 5563 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5564 return self._parse_query_modifiers( 5565 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5566 ) 5567 5568 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5569 this = None 5570 if self._match_texts(self.TRANSACTION_KIND): 5571 this = self._prev.text 5572 5573 self._match_texts(("TRANSACTION", "WORK")) 5574 5575 modes = [] 5576 while True: 5577 mode = [] 5578 while self._match(TokenType.VAR): 5579 mode.append(self._prev.text) 5580 5581 if mode: 5582 modes.append(" ".join(mode)) 5583 if not self._match(TokenType.COMMA): 5584 break 5585 5586 return self.expression(exp.Transaction, this=this, modes=modes) 5587 5588 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5589 chain = None 5590 savepoint = None 5591 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5592 5593 self._match_texts(("TRANSACTION", "WORK")) 5594 5595 if self._match_text_seq("TO"): 5596 self._match_text_seq("SAVEPOINT") 5597 savepoint = self._parse_id_var() 5598 5599 if self._match(TokenType.AND): 5600 chain = not self._match_text_seq("NO") 5601 self._match_text_seq("CHAIN") 5602 5603 if is_rollback: 5604 return self.expression(exp.Rollback, savepoint=savepoint) 5605 5606 return self.expression(exp.Commit, chain=chain) 5607 5608 def _parse_refresh(self) -> exp.Refresh: 5609 self._match(TokenType.TABLE) 5610 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5611 5612 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5613 if not self._match_text_seq("ADD"): 5614 return None 5615 5616 self._match(TokenType.COLUMN) 5617 exists_column = self._parse_exists(not_=True) 5618 expression = self._parse_field_def() 5619 5620 if expression: 5621 expression.set("exists", exists_column) 5622 5623 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5624 if self._match_texts(("FIRST", "AFTER")): 5625 position = self._prev.text 5626 column_position = self.expression( 5627 exp.ColumnPosition, this=self._parse_column(), position=position 5628 ) 5629 expression.set("position", column_position) 5630 5631 return expression 5632 5633 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5634 drop = self._match(TokenType.DROP) and self._parse_drop() 5635 if drop and not isinstance(drop, exp.Command): 5636 drop.set("kind", drop.args.get("kind", "COLUMN")) 5637 return drop 5638 5639 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5640 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5641 return self.expression( 5642 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5643 ) 5644 5645 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5646 index = self._index - 1 5647 5648 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5649 return self._parse_csv( 5650 lambda: self.expression( 5651 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5652 ) 5653 ) 5654 5655 self._retreat(index) 5656 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5657 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5658 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5659 5660 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5661 self._match(TokenType.COLUMN) 5662 column = self._parse_field(any_token=True) 5663 5664 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5665 return self.expression(exp.AlterColumn, this=column, drop=True) 5666 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5667 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5668 if self._match(TokenType.COMMENT): 5669 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5670 5671 self._match_text_seq("SET", "DATA") 5672 return self.expression( 5673 exp.AlterColumn, 5674 this=column, 5675 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5676 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5677 using=self._match(TokenType.USING) and self._parse_conjunction(), 5678 ) 5679 5680 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5681 index = self._index - 1 5682 5683 partition_exists = self._parse_exists() 5684 if self._match(TokenType.PARTITION, advance=False): 5685 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5686 5687 self._retreat(index) 5688 return self._parse_csv(self._parse_drop_column) 5689 5690 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5691 if self._match(TokenType.COLUMN): 5692 exists = self._parse_exists() 5693 old_column = self._parse_column() 5694 to = self._match_text_seq("TO") 5695 new_column = self._parse_column() 5696 5697 if old_column is None or to is None or new_column is None: 5698 return None 5699 5700 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5701 5702 self._match_text_seq("TO") 5703 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5704 5705 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5706 start = self._prev 5707 5708 if not self._match(TokenType.TABLE): 5709 return self._parse_as_command(start) 5710 5711 exists = self._parse_exists() 5712 only = self._match_text_seq("ONLY") 5713 this = self._parse_table(schema=True) 5714 5715 if self._next: 5716 self._advance() 5717 5718 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5719 if parser: 5720 actions = ensure_list(parser(self)) 5721 options = self._parse_csv(self._parse_property) 5722 5723 if not self._curr and actions: 5724 return self.expression( 5725 exp.AlterTable, 5726 this=this, 5727 exists=exists, 5728 actions=actions, 5729 only=only, 5730 options=options, 5731 ) 5732 5733 return self._parse_as_command(start) 5734 5735 def _parse_merge(self) -> exp.Merge: 5736 self._match(TokenType.INTO) 5737 target = self._parse_table() 5738 5739 if target and self._match(TokenType.ALIAS, advance=False): 5740 target.set("alias", self._parse_table_alias()) 5741 5742 self._match(TokenType.USING) 5743 using = self._parse_table() 5744 5745 self._match(TokenType.ON) 5746 on = self._parse_conjunction() 5747 5748 return self.expression( 5749 exp.Merge, 5750 this=target, 5751 using=using, 5752 on=on, 5753 expressions=self._parse_when_matched(), 5754 ) 5755 5756 def _parse_when_matched(self) -> t.List[exp.When]: 5757 whens = [] 5758 5759 while self._match(TokenType.WHEN): 5760 matched = not self._match(TokenType.NOT) 5761 self._match_text_seq("MATCHED") 5762 source = ( 5763 False 5764 if self._match_text_seq("BY", "TARGET") 5765 else self._match_text_seq("BY", "SOURCE") 5766 ) 5767 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5768 5769 self._match(TokenType.THEN) 5770 5771 if self._match(TokenType.INSERT): 5772 _this = self._parse_star() 5773 if _this: 5774 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5775 else: 5776 then = self.expression( 5777 exp.Insert, 5778 this=self._parse_value(), 5779 expression=self._match_text_seq("VALUES") and self._parse_value(), 5780 ) 5781 elif self._match(TokenType.UPDATE): 5782 expressions = self._parse_star() 5783 if expressions: 5784 then = self.expression(exp.Update, expressions=expressions) 5785 else: 5786 then = self.expression( 5787 exp.Update, 5788 expressions=self._match(TokenType.SET) 5789 and self._parse_csv(self._parse_equality), 5790 ) 5791 elif self._match(TokenType.DELETE): 5792 then = self.expression(exp.Var, this=self._prev.text) 5793 else: 5794 then = None 5795 5796 whens.append( 5797 self.expression( 5798 exp.When, 5799 matched=matched, 5800 source=source, 5801 condition=condition, 5802 then=then, 5803 ) 5804 ) 5805 return whens 5806 5807 def _parse_show(self) -> t.Optional[exp.Expression]: 5808 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5809 if parser: 5810 return parser(self) 5811 return self._parse_as_command(self._prev) 5812 5813 def _parse_set_item_assignment( 5814 self, kind: t.Optional[str] = None 5815 ) -> t.Optional[exp.Expression]: 5816 index = self._index 5817 5818 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5819 return self._parse_set_transaction(global_=kind == "GLOBAL") 5820 5821 left = self._parse_primary() or self._parse_id_var() 5822 assignment_delimiter = self._match_texts(("=", "TO")) 5823 5824 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5825 self._retreat(index) 5826 return None 5827 5828 right = self._parse_statement() or self._parse_id_var() 5829 this = self.expression(exp.EQ, this=left, expression=right) 5830 5831 return self.expression(exp.SetItem, this=this, kind=kind) 5832 5833 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5834 self._match_text_seq("TRANSACTION") 5835 characteristics = self._parse_csv( 5836 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5837 ) 5838 return self.expression( 5839 exp.SetItem, 5840 expressions=characteristics, 5841 kind="TRANSACTION", 5842 **{"global": global_}, # type: ignore 5843 ) 5844 5845 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5846 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5847 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5848 5849 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5850 index = self._index 5851 set_ = self.expression( 5852 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5853 ) 5854 5855 if self._curr: 5856 self._retreat(index) 5857 return self._parse_as_command(self._prev) 5858 5859 return set_ 5860 5861 def _parse_var_from_options( 5862 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5863 ) -> t.Optional[exp.Var]: 5864 start = self._curr 5865 if not start: 5866 return None 5867 5868 option = start.text.upper() 5869 continuations = options.get(option) 5870 5871 index = self._index 5872 self._advance() 5873 for keywords in continuations or []: 5874 if isinstance(keywords, str): 5875 keywords = (keywords,) 5876 5877 if self._match_text_seq(*keywords): 5878 option = f"{option} {' '.join(keywords)}" 5879 break 5880 else: 5881 if continuations or continuations is None: 5882 if raise_unmatched: 5883 self.raise_error(f"Unknown option {option}") 5884 5885 self._retreat(index) 5886 return None 5887 5888 return exp.var(option) 5889 5890 def _parse_as_command(self, start: Token) -> exp.Command: 5891 while self._curr: 5892 self._advance() 5893 text = self._find_sql(start, self._prev) 5894 size = len(start.text) 5895 self._warn_unsupported() 5896 return exp.Command(this=text[:size], expression=text[size:]) 5897 5898 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5899 settings = [] 5900 5901 self._match_l_paren() 5902 kind = self._parse_id_var() 5903 5904 if self._match(TokenType.L_PAREN): 5905 while True: 5906 key = self._parse_id_var() 5907 value = self._parse_primary() 5908 5909 if not key and value is None: 5910 break 5911 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5912 self._match(TokenType.R_PAREN) 5913 5914 self._match_r_paren() 5915 5916 return self.expression( 5917 exp.DictProperty, 5918 this=this, 5919 kind=kind.this if kind else None, 5920 settings=settings, 5921 ) 5922 5923 def _parse_dict_range(self, this: str) -> exp.DictRange: 5924 self._match_l_paren() 5925 has_min = self._match_text_seq("MIN") 5926 if has_min: 5927 min = self._parse_var() or self._parse_primary() 5928 self._match_text_seq("MAX") 5929 max = self._parse_var() or self._parse_primary() 5930 else: 5931 max = self._parse_var() or self._parse_primary() 5932 min = exp.Literal.number(0) 5933 self._match_r_paren() 5934 return self.expression(exp.DictRange, this=this, min=min, max=max) 5935 5936 def _parse_comprehension( 5937 self, this: t.Optional[exp.Expression] 5938 ) -> t.Optional[exp.Comprehension]: 5939 index = self._index 5940 expression = self._parse_column() 5941 if not self._match(TokenType.IN): 5942 self._retreat(index - 1) 5943 return None 5944 iterator = self._parse_column() 5945 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5946 return self.expression( 5947 exp.Comprehension, 5948 this=this, 5949 expression=expression, 5950 iterator=iterator, 5951 condition=condition, 5952 ) 5953 5954 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5955 if self._match(TokenType.HEREDOC_STRING): 5956 return self.expression(exp.Heredoc, this=self._prev.text) 5957 5958 if not self._match_text_seq("$"): 5959 return None 5960 5961 tags = ["$"] 5962 tag_text = None 5963 5964 if self._is_connected(): 5965 self._advance() 5966 tags.append(self._prev.text.upper()) 5967 else: 5968 self.raise_error("No closing $ found") 5969 5970 if tags[-1] != "$": 5971 if self._is_connected() and self._match_text_seq("$"): 5972 tag_text = tags[-1] 5973 tags.append("$") 5974 else: 5975 self.raise_error("No closing $ found") 5976 5977 heredoc_start = self._curr 5978 5979 while self._curr: 5980 if self._match_text_seq(*tags, advance=False): 5981 this = self._find_sql(heredoc_start, self._prev) 5982 self._advance(len(tags)) 5983 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5984 5985 self._advance() 5986 5987 self.raise_error(f"No closing {''.join(tags)} found") 5988 return None 5989 5990 def _find_parser( 5991 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5992 ) -> t.Optional[t.Callable]: 5993 if not self._curr: 5994 return None 5995 5996 index = self._index 5997 this = [] 5998 while True: 5999 # The current token might be multiple words 6000 curr = self._curr.text.upper() 6001 key = curr.split(" ") 6002 this.append(curr) 6003 6004 self._advance() 6005 result, trie = in_trie(trie, key) 6006 if result == TrieResult.FAILED: 6007 break 6008 6009 if result == TrieResult.EXISTS: 6010 subparser = parsers[" ".join(this)] 6011 return subparser 6012 6013 self._retreat(index) 6014 return None 6015 6016 def _match(self, token_type, advance=True, expression=None): 6017 if not self._curr: 6018 return None 6019 6020 if self._curr.token_type == token_type: 6021 if advance: 6022 self._advance() 6023 self._add_comments(expression) 6024 return True 6025 6026 return None 6027 6028 def _match_set(self, types, advance=True): 6029 if not self._curr: 6030 return None 6031 6032 if self._curr.token_type in types: 6033 if advance: 6034 self._advance() 6035 return True 6036 6037 return None 6038 6039 def _match_pair(self, token_type_a, token_type_b, advance=True): 6040 if not self._curr or not self._next: 6041 return None 6042 6043 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6044 if advance: 6045 self._advance(2) 6046 return True 6047 6048 return None 6049 6050 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6051 if not self._match(TokenType.L_PAREN, expression=expression): 6052 self.raise_error("Expecting (") 6053 6054 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6055 if not self._match(TokenType.R_PAREN, expression=expression): 6056 self.raise_error("Expecting )") 6057 6058 def _match_texts(self, texts, advance=True): 6059 if self._curr and self._curr.text.upper() in texts: 6060 if advance: 6061 self._advance() 6062 return True 6063 return None 6064 6065 def _match_text_seq(self, *texts, advance=True): 6066 index = self._index 6067 for text in texts: 6068 if self._curr and self._curr.text.upper() == text: 6069 self._advance() 6070 else: 6071 self._retreat(index) 6072 return None 6073 6074 if not advance: 6075 self._retreat(index) 6076 6077 return True 6078 6079 def _replace_lambda( 6080 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6081 ) -> t.Optional[exp.Expression]: 6082 if not node: 6083 return node 6084 6085 for column in node.find_all(exp.Column): 6086 if column.parts[0].name in lambda_variables: 6087 dot_or_id = column.to_dot() if column.table else column.this 6088 parent = column.parent 6089 6090 while isinstance(parent, exp.Dot): 6091 if not isinstance(parent.parent, exp.Dot): 6092 parent.replace(dot_or_id) 6093 break 6094 parent = parent.parent 6095 else: 6096 if column is node: 6097 node = dot_or_id 6098 else: 6099 column.replace(dot_or_id) 6100 return node 6101 6102 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6103 start = self._prev 6104 6105 # Not to be confused with TRUNCATE(number, decimals) function call 6106 if self._match(TokenType.L_PAREN): 6107 self._retreat(self._index - 2) 6108 return self._parse_function() 6109 6110 # Clickhouse supports TRUNCATE DATABASE as well 6111 is_database = self._match(TokenType.DATABASE) 6112 6113 self._match(TokenType.TABLE) 6114 6115 exists = self._parse_exists(not_=False) 6116 6117 expressions = self._parse_csv( 6118 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6119 ) 6120 6121 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6122 6123 if self._match_text_seq("RESTART", "IDENTITY"): 6124 identity = "RESTART" 6125 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6126 identity = "CONTINUE" 6127 else: 6128 identity = None 6129 6130 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6131 option = self._prev.text 6132 else: 6133 option = None 6134 6135 partition = self._parse_partition() 6136 6137 # Fallback case 6138 if self._curr: 6139 return self._parse_as_command(start) 6140 6141 return self.expression( 6142 exp.TruncateTable, 6143 expressions=expressions, 6144 is_database=is_database, 6145 exists=exists, 6146 cluster=cluster, 6147 identity=identity, 6148 option=option, 6149 partition=partition, 6150 ) 6151 6152 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6153 this = self._parse_ordered(self._parse_opclass) 6154 6155 if not self._match(TokenType.WITH): 6156 return this 6157 6158 op = self._parse_var(any_token=True) 6159 6160 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1115 def __init__( 1116 self, 1117 error_level: t.Optional[ErrorLevel] = None, 1118 error_message_context: int = 100, 1119 max_errors: int = 3, 1120 dialect: DialectType = None, 1121 ): 1122 from sqlglot.dialects import Dialect 1123 1124 self.error_level = error_level or ErrorLevel.IMMEDIATE 1125 self.error_message_context = error_message_context 1126 self.max_errors = max_errors 1127 self.dialect = Dialect.get_or_raise(dialect) 1128 self.reset()
1140 def parse( 1141 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1142 ) -> t.List[t.Optional[exp.Expression]]: 1143 """ 1144 Parses a list of tokens and returns a list of syntax trees, one tree 1145 per parsed SQL statement. 1146 1147 Args: 1148 raw_tokens: The list of tokens. 1149 sql: The original SQL string, used to produce helpful debug messages. 1150 1151 Returns: 1152 The list of the produced syntax trees. 1153 """ 1154 return self._parse( 1155 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1156 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1158 def parse_into( 1159 self, 1160 expression_types: exp.IntoType, 1161 raw_tokens: t.List[Token], 1162 sql: t.Optional[str] = None, 1163 ) -> t.List[t.Optional[exp.Expression]]: 1164 """ 1165 Parses a list of tokens into a given Expression type. If a collection of Expression 1166 types is given instead, this method will try to parse the token list into each one 1167 of them, stopping at the first for which the parsing succeeds. 1168 1169 Args: 1170 expression_types: The expression type(s) to try and parse the token list into. 1171 raw_tokens: The list of tokens. 1172 sql: The original SQL string, used to produce helpful debug messages. 1173 1174 Returns: 1175 The target Expression. 1176 """ 1177 errors = [] 1178 for expression_type in ensure_list(expression_types): 1179 parser = self.EXPRESSION_PARSERS.get(expression_type) 1180 if not parser: 1181 raise TypeError(f"No parser registered for {expression_type}") 1182 1183 try: 1184 return self._parse(parser, raw_tokens, sql) 1185 except ParseError as e: 1186 e.errors[0]["into_expression"] = expression_type 1187 errors.append(e) 1188 1189 raise ParseError( 1190 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1191 errors=merge_errors(errors), 1192 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1229 def check_errors(self) -> None: 1230 """Logs or raises any found errors, depending on the chosen error level setting.""" 1231 if self.error_level == ErrorLevel.WARN: 1232 for error in self.errors: 1233 logger.error(str(error)) 1234 elif self.error_level == ErrorLevel.RAISE and self.errors: 1235 raise ParseError( 1236 concat_messages(self.errors, self.max_errors), 1237 errors=merge_errors(self.errors), 1238 )
Logs or raises any found errors, depending on the chosen error level setting.
1240 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1241 """ 1242 Appends an error in the list of recorded errors or raises it, depending on the chosen 1243 error level setting. 1244 """ 1245 token = token or self._curr or self._prev or Token.string("") 1246 start = token.start 1247 end = token.end + 1 1248 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1249 highlight = self.sql[start:end] 1250 end_context = self.sql[end : end + self.error_message_context] 1251 1252 error = ParseError.new( 1253 f"{message}. Line {token.line}, Col: {token.col}.\n" 1254 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1255 description=message, 1256 line=token.line, 1257 col=token.col, 1258 start_context=start_context, 1259 highlight=highlight, 1260 end_context=end_context, 1261 ) 1262 1263 if self.error_level == ErrorLevel.IMMEDIATE: 1264 raise error 1265 1266 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1268 def expression( 1269 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1270 ) -> E: 1271 """ 1272 Creates a new, validated Expression. 1273 1274 Args: 1275 exp_class: The expression class to instantiate. 1276 comments: An optional list of comments to attach to the expression. 1277 kwargs: The arguments to set for the expression along with their respective values. 1278 1279 Returns: 1280 The target expression. 1281 """ 1282 instance = exp_class(**kwargs) 1283 instance.add_comments(comments) if comments else self._add_comments(instance) 1284 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1291 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1292 """ 1293 Validates an Expression, making sure that all its mandatory arguments are set. 1294 1295 Args: 1296 expression: The expression to validate. 1297 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1298 1299 Returns: 1300 The validated expression. 1301 """ 1302 if self.error_level != ErrorLevel.IGNORE: 1303 for error_message in expression.error_messages(args): 1304 self.raise_error(error_message) 1305 1306 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.