sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143class _Parser(type): 144 def __new__(cls, clsname, bases, attrs): 145 klass = super().__new__(cls, clsname, bases, attrs) 146 147 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 148 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 149 150 return klass 151 152 153class Parser(metaclass=_Parser): 154 """ 155 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 156 157 Args: 158 error_level: The desired error level. 159 Default: ErrorLevel.IMMEDIATE 160 error_message_context: The amount of context to capture from a query string when displaying 161 the error message (in number of characters). 162 Default: 100 163 max_errors: Maximum number of error messages to include in a raised ParseError. 164 This is only relevant if error_level is ErrorLevel.RAISE. 165 Default: 3 166 """ 167 168 FUNCTIONS: t.Dict[str, t.Callable] = { 169 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 170 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 171 "CONCAT": lambda args, dialect: exp.Concat( 172 expressions=args, 173 safe=not dialect.STRICT_STRING_CONCAT, 174 coalesce=dialect.CONCAT_COALESCE, 175 ), 176 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 177 expressions=args, 178 safe=not dialect.STRICT_STRING_CONCAT, 179 coalesce=dialect.CONCAT_COALESCE, 180 ), 181 "CONVERT_TIMEZONE": build_convert_timezone, 182 "DATE_TO_DATE_STR": lambda args: exp.Cast( 183 this=seq_get(args, 0), 184 to=exp.DataType(this=exp.DataType.Type.TEXT), 185 ), 186 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 187 start=seq_get(args, 0), 188 end=seq_get(args, 1), 189 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 190 ), 191 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 192 "HEX": build_hex, 193 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 194 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 195 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 196 "LIKE": build_like, 197 "LOG": build_logarithm, 198 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 199 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 200 "LOWER": build_lower, 201 "LPAD": lambda args: build_pad(args), 202 "LEFTPAD": lambda args: build_pad(args), 203 "MOD": build_mod, 204 "RPAD": lambda args: build_pad(args, is_left=False), 205 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 206 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 207 if len(args) != 2 208 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 209 "TIME_TO_TIME_STR": lambda args: exp.Cast( 210 this=seq_get(args, 0), 211 to=exp.DataType(this=exp.DataType.Type.TEXT), 212 ), 213 "TO_HEX": build_hex, 214 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 215 this=exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 start=exp.Literal.number(1), 220 length=exp.Literal.number(10), 221 ), 222 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 223 "UPPER": build_upper, 224 "VAR_MAP": build_var_map, 225 } 226 227 NO_PAREN_FUNCTIONS = { 228 TokenType.CURRENT_DATE: exp.CurrentDate, 229 TokenType.CURRENT_DATETIME: exp.CurrentDate, 230 TokenType.CURRENT_TIME: exp.CurrentTime, 231 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 232 TokenType.CURRENT_USER: exp.CurrentUser, 233 } 234 235 STRUCT_TYPE_TOKENS = { 236 TokenType.NESTED, 237 TokenType.OBJECT, 238 TokenType.STRUCT, 239 } 240 241 NESTED_TYPE_TOKENS = { 242 TokenType.ARRAY, 243 TokenType.LIST, 244 TokenType.LOWCARDINALITY, 245 TokenType.MAP, 246 TokenType.NULLABLE, 247 *STRUCT_TYPE_TOKENS, 248 } 249 250 ENUM_TYPE_TOKENS = { 251 TokenType.ENUM, 252 TokenType.ENUM8, 253 TokenType.ENUM16, 254 } 255 256 AGGREGATE_TYPE_TOKENS = { 257 TokenType.AGGREGATEFUNCTION, 258 TokenType.SIMPLEAGGREGATEFUNCTION, 259 } 260 261 TYPE_TOKENS = { 262 TokenType.BIT, 263 TokenType.BOOLEAN, 264 TokenType.TINYINT, 265 TokenType.UTINYINT, 266 TokenType.SMALLINT, 267 TokenType.USMALLINT, 268 TokenType.INT, 269 TokenType.UINT, 270 TokenType.BIGINT, 271 TokenType.UBIGINT, 272 TokenType.INT128, 273 TokenType.UINT128, 274 TokenType.INT256, 275 TokenType.UINT256, 276 TokenType.MEDIUMINT, 277 TokenType.UMEDIUMINT, 278 TokenType.FIXEDSTRING, 279 TokenType.FLOAT, 280 TokenType.DOUBLE, 281 TokenType.CHAR, 282 TokenType.NCHAR, 283 TokenType.VARCHAR, 284 TokenType.NVARCHAR, 285 TokenType.BPCHAR, 286 TokenType.TEXT, 287 TokenType.MEDIUMTEXT, 288 TokenType.LONGTEXT, 289 TokenType.MEDIUMBLOB, 290 TokenType.LONGBLOB, 291 TokenType.BINARY, 292 TokenType.VARBINARY, 293 TokenType.JSON, 294 TokenType.JSONB, 295 TokenType.INTERVAL, 296 TokenType.TINYBLOB, 297 TokenType.TINYTEXT, 298 TokenType.TIME, 299 TokenType.TIMETZ, 300 TokenType.TIMESTAMP, 301 TokenType.TIMESTAMP_S, 302 TokenType.TIMESTAMP_MS, 303 TokenType.TIMESTAMP_NS, 304 TokenType.TIMESTAMPTZ, 305 TokenType.TIMESTAMPLTZ, 306 TokenType.TIMESTAMPNTZ, 307 TokenType.DATETIME, 308 TokenType.DATETIME64, 309 TokenType.DATE, 310 TokenType.DATE32, 311 TokenType.INT4RANGE, 312 TokenType.INT4MULTIRANGE, 313 TokenType.INT8RANGE, 314 TokenType.INT8MULTIRANGE, 315 TokenType.NUMRANGE, 316 TokenType.NUMMULTIRANGE, 317 TokenType.TSRANGE, 318 TokenType.TSMULTIRANGE, 319 TokenType.TSTZRANGE, 320 TokenType.TSTZMULTIRANGE, 321 TokenType.DATERANGE, 322 TokenType.DATEMULTIRANGE, 323 TokenType.DECIMAL, 324 TokenType.UDECIMAL, 325 TokenType.BIGDECIMAL, 326 TokenType.UUID, 327 TokenType.GEOGRAPHY, 328 TokenType.GEOMETRY, 329 TokenType.HLLSKETCH, 330 TokenType.HSTORE, 331 TokenType.PSEUDO_TYPE, 332 TokenType.SUPER, 333 TokenType.SERIAL, 334 TokenType.SMALLSERIAL, 335 TokenType.BIGSERIAL, 336 TokenType.XML, 337 TokenType.YEAR, 338 TokenType.UNIQUEIDENTIFIER, 339 TokenType.USERDEFINED, 340 TokenType.MONEY, 341 TokenType.SMALLMONEY, 342 TokenType.ROWVERSION, 343 TokenType.IMAGE, 344 TokenType.VARIANT, 345 TokenType.VECTOR, 346 TokenType.OBJECT, 347 TokenType.OBJECT_IDENTIFIER, 348 TokenType.INET, 349 TokenType.IPADDRESS, 350 TokenType.IPPREFIX, 351 TokenType.IPV4, 352 TokenType.IPV6, 353 TokenType.UNKNOWN, 354 TokenType.NULL, 355 TokenType.NAME, 356 TokenType.TDIGEST, 357 *ENUM_TYPE_TOKENS, 358 *NESTED_TYPE_TOKENS, 359 *AGGREGATE_TYPE_TOKENS, 360 } 361 362 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 363 TokenType.BIGINT: TokenType.UBIGINT, 364 TokenType.INT: TokenType.UINT, 365 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 366 TokenType.SMALLINT: TokenType.USMALLINT, 367 TokenType.TINYINT: TokenType.UTINYINT, 368 TokenType.DECIMAL: TokenType.UDECIMAL, 369 } 370 371 SUBQUERY_PREDICATES = { 372 TokenType.ANY: exp.Any, 373 TokenType.ALL: exp.All, 374 TokenType.EXISTS: exp.Exists, 375 TokenType.SOME: exp.Any, 376 } 377 378 RESERVED_TOKENS = { 379 *Tokenizer.SINGLE_TOKENS.values(), 380 TokenType.SELECT, 381 } - {TokenType.IDENTIFIER} 382 383 DB_CREATABLES = { 384 TokenType.DATABASE, 385 TokenType.DICTIONARY, 386 TokenType.MODEL, 387 TokenType.SCHEMA, 388 TokenType.SEQUENCE, 389 TokenType.STORAGE_INTEGRATION, 390 TokenType.TABLE, 391 TokenType.TAG, 392 TokenType.VIEW, 393 TokenType.WAREHOUSE, 394 TokenType.STREAMLIT, 395 } 396 397 CREATABLES = { 398 TokenType.COLUMN, 399 TokenType.CONSTRAINT, 400 TokenType.FOREIGN_KEY, 401 TokenType.FUNCTION, 402 TokenType.INDEX, 403 TokenType.PROCEDURE, 404 *DB_CREATABLES, 405 } 406 407 ALTERABLES = { 408 TokenType.TABLE, 409 TokenType.VIEW, 410 } 411 412 # Tokens that can represent identifiers 413 ID_VAR_TOKENS = { 414 TokenType.ALL, 415 TokenType.VAR, 416 TokenType.ANTI, 417 TokenType.APPLY, 418 TokenType.ASC, 419 TokenType.ASOF, 420 TokenType.AUTO_INCREMENT, 421 TokenType.BEGIN, 422 TokenType.BPCHAR, 423 TokenType.CACHE, 424 TokenType.CASE, 425 TokenType.COLLATE, 426 TokenType.COMMAND, 427 TokenType.COMMENT, 428 TokenType.COMMIT, 429 TokenType.CONSTRAINT, 430 TokenType.COPY, 431 TokenType.CUBE, 432 TokenType.DEFAULT, 433 TokenType.DELETE, 434 TokenType.DESC, 435 TokenType.DESCRIBE, 436 TokenType.DICTIONARY, 437 TokenType.DIV, 438 TokenType.END, 439 TokenType.EXECUTE, 440 TokenType.ESCAPE, 441 TokenType.FALSE, 442 TokenType.FIRST, 443 TokenType.FILTER, 444 TokenType.FINAL, 445 TokenType.FORMAT, 446 TokenType.FULL, 447 TokenType.IDENTIFIER, 448 TokenType.IS, 449 TokenType.ISNULL, 450 TokenType.INTERVAL, 451 TokenType.KEEP, 452 TokenType.KILL, 453 TokenType.LEFT, 454 TokenType.LOAD, 455 TokenType.MERGE, 456 TokenType.NATURAL, 457 TokenType.NEXT, 458 TokenType.OFFSET, 459 TokenType.OPERATOR, 460 TokenType.ORDINALITY, 461 TokenType.OVERLAPS, 462 TokenType.OVERWRITE, 463 TokenType.PARTITION, 464 TokenType.PERCENT, 465 TokenType.PIVOT, 466 TokenType.PRAGMA, 467 TokenType.RANGE, 468 TokenType.RECURSIVE, 469 TokenType.REFERENCES, 470 TokenType.REFRESH, 471 TokenType.RENAME, 472 TokenType.REPLACE, 473 TokenType.RIGHT, 474 TokenType.ROLLUP, 475 TokenType.ROW, 476 TokenType.ROWS, 477 TokenType.SEMI, 478 TokenType.SET, 479 TokenType.SETTINGS, 480 TokenType.SHOW, 481 TokenType.TEMPORARY, 482 TokenType.TOP, 483 TokenType.TRUE, 484 TokenType.TRUNCATE, 485 TokenType.UNIQUE, 486 TokenType.UNNEST, 487 TokenType.UNPIVOT, 488 TokenType.UPDATE, 489 TokenType.USE, 490 TokenType.VOLATILE, 491 TokenType.WINDOW, 492 *CREATABLES, 493 *SUBQUERY_PREDICATES, 494 *TYPE_TOKENS, 495 *NO_PAREN_FUNCTIONS, 496 } 497 498 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 499 500 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 501 TokenType.ANTI, 502 TokenType.APPLY, 503 TokenType.ASOF, 504 TokenType.FULL, 505 TokenType.LEFT, 506 TokenType.LOCK, 507 TokenType.NATURAL, 508 TokenType.OFFSET, 509 TokenType.RIGHT, 510 TokenType.SEMI, 511 TokenType.WINDOW, 512 } 513 514 ALIAS_TOKENS = ID_VAR_TOKENS 515 516 ARRAY_CONSTRUCTORS = { 517 "ARRAY": exp.Array, 518 "LIST": exp.List, 519 } 520 521 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 522 523 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 524 525 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 526 527 FUNC_TOKENS = { 528 TokenType.COLLATE, 529 TokenType.COMMAND, 530 TokenType.CURRENT_DATE, 531 TokenType.CURRENT_DATETIME, 532 TokenType.CURRENT_TIMESTAMP, 533 TokenType.CURRENT_TIME, 534 TokenType.CURRENT_USER, 535 TokenType.FILTER, 536 TokenType.FIRST, 537 TokenType.FORMAT, 538 TokenType.GLOB, 539 TokenType.IDENTIFIER, 540 TokenType.INDEX, 541 TokenType.ISNULL, 542 TokenType.ILIKE, 543 TokenType.INSERT, 544 TokenType.LIKE, 545 TokenType.MERGE, 546 TokenType.OFFSET, 547 TokenType.PRIMARY_KEY, 548 TokenType.RANGE, 549 TokenType.REPLACE, 550 TokenType.RLIKE, 551 TokenType.ROW, 552 TokenType.UNNEST, 553 TokenType.VAR, 554 TokenType.LEFT, 555 TokenType.RIGHT, 556 TokenType.SEQUENCE, 557 TokenType.DATE, 558 TokenType.DATETIME, 559 TokenType.TABLE, 560 TokenType.TIMESTAMP, 561 TokenType.TIMESTAMPTZ, 562 TokenType.TRUNCATE, 563 TokenType.WINDOW, 564 TokenType.XOR, 565 *TYPE_TOKENS, 566 *SUBQUERY_PREDICATES, 567 } 568 569 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 570 TokenType.AND: exp.And, 571 } 572 573 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 574 TokenType.COLON_EQ: exp.PropertyEQ, 575 } 576 577 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 578 TokenType.OR: exp.Or, 579 } 580 581 EQUALITY = { 582 TokenType.EQ: exp.EQ, 583 TokenType.NEQ: exp.NEQ, 584 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 585 } 586 587 COMPARISON = { 588 TokenType.GT: exp.GT, 589 TokenType.GTE: exp.GTE, 590 TokenType.LT: exp.LT, 591 TokenType.LTE: exp.LTE, 592 } 593 594 BITWISE = { 595 TokenType.AMP: exp.BitwiseAnd, 596 TokenType.CARET: exp.BitwiseXor, 597 TokenType.PIPE: exp.BitwiseOr, 598 } 599 600 TERM = { 601 TokenType.DASH: exp.Sub, 602 TokenType.PLUS: exp.Add, 603 TokenType.MOD: exp.Mod, 604 TokenType.COLLATE: exp.Collate, 605 } 606 607 FACTOR = { 608 TokenType.DIV: exp.IntDiv, 609 TokenType.LR_ARROW: exp.Distance, 610 TokenType.SLASH: exp.Div, 611 TokenType.STAR: exp.Mul, 612 } 613 614 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 615 616 TIMES = { 617 TokenType.TIME, 618 TokenType.TIMETZ, 619 } 620 621 TIMESTAMPS = { 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TIMESTAMPLTZ, 625 *TIMES, 626 } 627 628 SET_OPERATIONS = { 629 TokenType.UNION, 630 TokenType.INTERSECT, 631 TokenType.EXCEPT, 632 } 633 634 JOIN_METHODS = { 635 TokenType.ASOF, 636 TokenType.NATURAL, 637 TokenType.POSITIONAL, 638 } 639 640 JOIN_SIDES = { 641 TokenType.LEFT, 642 TokenType.RIGHT, 643 TokenType.FULL, 644 } 645 646 JOIN_KINDS = { 647 TokenType.ANTI, 648 TokenType.CROSS, 649 TokenType.INNER, 650 TokenType.OUTER, 651 TokenType.SEMI, 652 TokenType.STRAIGHT_JOIN, 653 } 654 655 JOIN_HINTS: t.Set[str] = set() 656 657 LAMBDAS = { 658 TokenType.ARROW: lambda self, expressions: self.expression( 659 exp.Lambda, 660 this=self._replace_lambda( 661 self._parse_assignment(), 662 expressions, 663 ), 664 expressions=expressions, 665 ), 666 TokenType.FARROW: lambda self, expressions: self.expression( 667 exp.Kwarg, 668 this=exp.var(expressions[0].name), 669 expression=self._parse_assignment(), 670 ), 671 } 672 673 COLUMN_OPERATORS = { 674 TokenType.DOT: None, 675 TokenType.DCOLON: lambda self, this, to: self.expression( 676 exp.Cast if self.STRICT_CAST else exp.TryCast, 677 this=this, 678 to=to, 679 ), 680 TokenType.ARROW: lambda self, this, path: self.expression( 681 exp.JSONExtract, 682 this=this, 683 expression=self.dialect.to_json_path(path), 684 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 685 ), 686 TokenType.DARROW: lambda self, this, path: self.expression( 687 exp.JSONExtractScalar, 688 this=this, 689 expression=self.dialect.to_json_path(path), 690 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 691 ), 692 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 693 exp.JSONBExtract, 694 this=this, 695 expression=path, 696 ), 697 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 698 exp.JSONBExtractScalar, 699 this=this, 700 expression=path, 701 ), 702 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 703 exp.JSONBContains, 704 this=this, 705 expression=key, 706 ), 707 } 708 709 EXPRESSION_PARSERS = { 710 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 711 exp.Column: lambda self: self._parse_column(), 712 exp.Condition: lambda self: self._parse_assignment(), 713 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 714 exp.Expression: lambda self: self._parse_expression(), 715 exp.From: lambda self: self._parse_from(joins=True), 716 exp.Group: lambda self: self._parse_group(), 717 exp.Having: lambda self: self._parse_having(), 718 exp.Identifier: lambda self: self._parse_id_var(), 719 exp.Join: lambda self: self._parse_join(), 720 exp.Lambda: lambda self: self._parse_lambda(), 721 exp.Lateral: lambda self: self._parse_lateral(), 722 exp.Limit: lambda self: self._parse_limit(), 723 exp.Offset: lambda self: self._parse_offset(), 724 exp.Order: lambda self: self._parse_order(), 725 exp.Ordered: lambda self: self._parse_ordered(), 726 exp.Properties: lambda self: self._parse_properties(), 727 exp.Qualify: lambda self: self._parse_qualify(), 728 exp.Returning: lambda self: self._parse_returning(), 729 exp.Select: lambda self: self._parse_select(), 730 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 731 exp.Table: lambda self: self._parse_table_parts(), 732 exp.TableAlias: lambda self: self._parse_table_alias(), 733 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 734 exp.Where: lambda self: self._parse_where(), 735 exp.Window: lambda self: self._parse_named_window(), 736 exp.With: lambda self: self._parse_with(), 737 "JOIN_TYPE": lambda self: self._parse_join_parts(), 738 } 739 740 STATEMENT_PARSERS = { 741 TokenType.ALTER: lambda self: self._parse_alter(), 742 TokenType.BEGIN: lambda self: self._parse_transaction(), 743 TokenType.CACHE: lambda self: self._parse_cache(), 744 TokenType.COMMENT: lambda self: self._parse_comment(), 745 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 746 TokenType.COPY: lambda self: self._parse_copy(), 747 TokenType.CREATE: lambda self: self._parse_create(), 748 TokenType.DELETE: lambda self: self._parse_delete(), 749 TokenType.DESC: lambda self: self._parse_describe(), 750 TokenType.DESCRIBE: lambda self: self._parse_describe(), 751 TokenType.DROP: lambda self: self._parse_drop(), 752 TokenType.INSERT: lambda self: self._parse_insert(), 753 TokenType.KILL: lambda self: self._parse_kill(), 754 TokenType.LOAD: lambda self: self._parse_load(), 755 TokenType.MERGE: lambda self: self._parse_merge(), 756 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 757 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 758 TokenType.REFRESH: lambda self: self._parse_refresh(), 759 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 760 TokenType.SET: lambda self: self._parse_set(), 761 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 762 TokenType.UNCACHE: lambda self: self._parse_uncache(), 763 TokenType.UPDATE: lambda self: self._parse_update(), 764 TokenType.USE: lambda self: self.expression( 765 exp.Use, 766 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 767 this=self._parse_table(schema=False), 768 ), 769 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 770 } 771 772 UNARY_PARSERS = { 773 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 774 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 775 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 776 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 777 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 778 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 779 } 780 781 STRING_PARSERS = { 782 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 783 exp.RawString, this=token.text 784 ), 785 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 786 exp.National, this=token.text 787 ), 788 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 789 TokenType.STRING: lambda self, token: self.expression( 790 exp.Literal, this=token.text, is_string=True 791 ), 792 TokenType.UNICODE_STRING: lambda self, token: self.expression( 793 exp.UnicodeString, 794 this=token.text, 795 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 796 ), 797 } 798 799 NUMERIC_PARSERS = { 800 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 801 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 802 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 803 TokenType.NUMBER: lambda self, token: self.expression( 804 exp.Literal, this=token.text, is_string=False 805 ), 806 } 807 808 PRIMARY_PARSERS = { 809 **STRING_PARSERS, 810 **NUMERIC_PARSERS, 811 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 812 TokenType.NULL: lambda self, _: self.expression(exp.Null), 813 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 814 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 815 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 816 TokenType.STAR: lambda self, _: self.expression( 817 exp.Star, 818 **{ 819 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 820 "replace": self._parse_star_op("REPLACE"), 821 "rename": self._parse_star_op("RENAME"), 822 }, 823 ), 824 } 825 826 PLACEHOLDER_PARSERS = { 827 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 828 TokenType.PARAMETER: lambda self: self._parse_parameter(), 829 TokenType.COLON: lambda self: ( 830 self.expression(exp.Placeholder, this=self._prev.text) 831 if self._match_set(self.ID_VAR_TOKENS) 832 else None 833 ), 834 } 835 836 RANGE_PARSERS = { 837 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 838 TokenType.GLOB: binary_range_parser(exp.Glob), 839 TokenType.ILIKE: binary_range_parser(exp.ILike), 840 TokenType.IN: lambda self, this: self._parse_in(this), 841 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 842 TokenType.IS: lambda self, this: self._parse_is(this), 843 TokenType.LIKE: binary_range_parser(exp.Like), 844 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 845 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 846 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 847 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 848 } 849 850 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 851 "ALLOWED_VALUES": lambda self: self.expression( 852 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 853 ), 854 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 855 "AUTO": lambda self: self._parse_auto_property(), 856 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 857 "BACKUP": lambda self: self.expression( 858 exp.BackupProperty, this=self._parse_var(any_token=True) 859 ), 860 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 861 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 862 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHECKSUM": lambda self: self._parse_checksum(), 864 "CLUSTER BY": lambda self: self._parse_cluster(), 865 "CLUSTERED": lambda self: self._parse_clustered_by(), 866 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 867 exp.CollateProperty, **kwargs 868 ), 869 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 870 "CONTAINS": lambda self: self._parse_contains_property(), 871 "COPY": lambda self: self._parse_copy_property(), 872 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 873 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 874 "DEFINER": lambda self: self._parse_definer(), 875 "DETERMINISTIC": lambda self: self.expression( 876 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 877 ), 878 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 879 "DISTKEY": lambda self: self._parse_distkey(), 880 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 881 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 882 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 883 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 884 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 885 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 886 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 887 "FREESPACE": lambda self: self._parse_freespace(), 888 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 889 "HEAP": lambda self: self.expression(exp.HeapProperty), 890 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 891 "IMMUTABLE": lambda self: self.expression( 892 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 893 ), 894 "INHERITS": lambda self: self.expression( 895 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 896 ), 897 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 898 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 899 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 900 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 901 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 902 "LIKE": lambda self: self._parse_create_like(), 903 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 904 "LOCK": lambda self: self._parse_locking(), 905 "LOCKING": lambda self: self._parse_locking(), 906 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 907 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 908 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 909 "MODIFIES": lambda self: self._parse_modifies_property(), 910 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 911 "NO": lambda self: self._parse_no_property(), 912 "ON": lambda self: self._parse_on_property(), 913 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 914 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 915 "PARTITION": lambda self: self._parse_partitioned_of(), 916 "PARTITION BY": lambda self: self._parse_partitioned_by(), 917 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 919 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 920 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 921 "READS": lambda self: self._parse_reads_property(), 922 "REMOTE": lambda self: self._parse_remote_with_connection(), 923 "RETURNS": lambda self: self._parse_returns(), 924 "STRICT": lambda self: self.expression(exp.StrictProperty), 925 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 926 "ROW": lambda self: self._parse_row(), 927 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 928 "SAMPLE": lambda self: self.expression( 929 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 930 ), 931 "SECURE": lambda self: self.expression(exp.SecureProperty), 932 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 933 "SETTINGS": lambda self: self.expression( 934 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 935 ), 936 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 937 "SORTKEY": lambda self: self._parse_sortkey(), 938 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 939 "STABLE": lambda self: self.expression( 940 exp.StabilityProperty, this=exp.Literal.string("STABLE") 941 ), 942 "STORED": lambda self: self._parse_stored(), 943 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 944 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 945 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 946 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 947 "TO": lambda self: self._parse_to_table(), 948 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 949 "TRANSFORM": lambda self: self.expression( 950 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 951 ), 952 "TTL": lambda self: self._parse_ttl(), 953 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 954 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 955 "VOLATILE": lambda self: self._parse_volatile_property(), 956 "WITH": lambda self: self._parse_with_property(), 957 } 958 959 CONSTRAINT_PARSERS = { 960 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 961 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 962 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 963 "CHARACTER SET": lambda self: self.expression( 964 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 965 ), 966 "CHECK": lambda self: self.expression( 967 exp.CheckColumnConstraint, 968 this=self._parse_wrapped(self._parse_assignment), 969 enforced=self._match_text_seq("ENFORCED"), 970 ), 971 "COLLATE": lambda self: self.expression( 972 exp.CollateColumnConstraint, 973 this=self._parse_identifier() or self._parse_column(), 974 ), 975 "COMMENT": lambda self: self.expression( 976 exp.CommentColumnConstraint, this=self._parse_string() 977 ), 978 "COMPRESS": lambda self: self._parse_compress(), 979 "CLUSTERED": lambda self: self.expression( 980 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 981 ), 982 "NONCLUSTERED": lambda self: self.expression( 983 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 984 ), 985 "DEFAULT": lambda self: self.expression( 986 exp.DefaultColumnConstraint, this=self._parse_bitwise() 987 ), 988 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 989 "EPHEMERAL": lambda self: self.expression( 990 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 991 ), 992 "EXCLUDE": lambda self: self.expression( 993 exp.ExcludeColumnConstraint, this=self._parse_index_params() 994 ), 995 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 996 "FORMAT": lambda self: self.expression( 997 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 998 ), 999 "GENERATED": lambda self: self._parse_generated_as_identity(), 1000 "IDENTITY": lambda self: self._parse_auto_increment(), 1001 "INLINE": lambda self: self._parse_inline(), 1002 "LIKE": lambda self: self._parse_create_like(), 1003 "NOT": lambda self: self._parse_not_constraint(), 1004 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1005 "ON": lambda self: ( 1006 self._match(TokenType.UPDATE) 1007 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1008 ) 1009 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1010 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1011 "PERIOD": lambda self: self._parse_period_for_system_time(), 1012 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1013 "REFERENCES": lambda self: self._parse_references(match=False), 1014 "TITLE": lambda self: self.expression( 1015 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1016 ), 1017 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1018 "UNIQUE": lambda self: self._parse_unique(), 1019 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1020 "WITH": lambda self: self.expression( 1021 exp.Properties, expressions=self._parse_wrapped_properties() 1022 ), 1023 } 1024 1025 ALTER_PARSERS = { 1026 "ADD": lambda self: self._parse_alter_table_add(), 1027 "ALTER": lambda self: self._parse_alter_table_alter(), 1028 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1029 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1030 "DROP": lambda self: self._parse_alter_table_drop(), 1031 "RENAME": lambda self: self._parse_alter_table_rename(), 1032 "SET": lambda self: self._parse_alter_table_set(), 1033 "AS": lambda self: self._parse_select(), 1034 } 1035 1036 ALTER_ALTER_PARSERS = { 1037 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1038 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1039 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1040 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1041 } 1042 1043 SCHEMA_UNNAMED_CONSTRAINTS = { 1044 "CHECK", 1045 "EXCLUDE", 1046 "FOREIGN KEY", 1047 "LIKE", 1048 "PERIOD", 1049 "PRIMARY KEY", 1050 "UNIQUE", 1051 } 1052 1053 NO_PAREN_FUNCTION_PARSERS = { 1054 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1055 "CASE": lambda self: self._parse_case(), 1056 "CONNECT_BY_ROOT": lambda self: self.expression( 1057 exp.ConnectByRoot, this=self._parse_column() 1058 ), 1059 "IF": lambda self: self._parse_if(), 1060 "NEXT": lambda self: self._parse_next_value_for(), 1061 } 1062 1063 INVALID_FUNC_NAME_TOKENS = { 1064 TokenType.IDENTIFIER, 1065 TokenType.STRING, 1066 } 1067 1068 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1069 1070 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1071 1072 FUNCTION_PARSERS = { 1073 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1074 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1075 "DECODE": lambda self: self._parse_decode(), 1076 "EXTRACT": lambda self: self._parse_extract(), 1077 "GAP_FILL": lambda self: self._parse_gap_fill(), 1078 "JSON_OBJECT": lambda self: self._parse_json_object(), 1079 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1080 "JSON_TABLE": lambda self: self._parse_json_table(), 1081 "MATCH": lambda self: self._parse_match_against(), 1082 "OPENJSON": lambda self: self._parse_open_json(), 1083 "POSITION": lambda self: self._parse_position(), 1084 "PREDICT": lambda self: self._parse_predict(), 1085 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1086 "STRING_AGG": lambda self: self._parse_string_agg(), 1087 "SUBSTRING": lambda self: self._parse_substring(), 1088 "TRIM": lambda self: self._parse_trim(), 1089 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1090 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1091 } 1092 1093 QUERY_MODIFIER_PARSERS = { 1094 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1095 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1096 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1097 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1098 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1099 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1100 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1101 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1102 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1103 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1104 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1105 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1106 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1107 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1108 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1109 TokenType.CLUSTER_BY: lambda self: ( 1110 "cluster", 1111 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1112 ), 1113 TokenType.DISTRIBUTE_BY: lambda self: ( 1114 "distribute", 1115 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1116 ), 1117 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1118 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1119 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1120 } 1121 1122 SET_PARSERS = { 1123 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1124 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1125 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1126 "TRANSACTION": lambda self: self._parse_set_transaction(), 1127 } 1128 1129 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1130 1131 TYPE_LITERAL_PARSERS = { 1132 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1133 } 1134 1135 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1136 1137 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1138 1139 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1140 1141 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1142 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1143 "ISOLATION": ( 1144 ("LEVEL", "REPEATABLE", "READ"), 1145 ("LEVEL", "READ", "COMMITTED"), 1146 ("LEVEL", "READ", "UNCOMITTED"), 1147 ("LEVEL", "SERIALIZABLE"), 1148 ), 1149 "READ": ("WRITE", "ONLY"), 1150 } 1151 1152 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1153 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1154 ) 1155 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1156 1157 CREATE_SEQUENCE: OPTIONS_TYPE = { 1158 "SCALE": ("EXTEND", "NOEXTEND"), 1159 "SHARD": ("EXTEND", "NOEXTEND"), 1160 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1161 **dict.fromkeys( 1162 ( 1163 "SESSION", 1164 "GLOBAL", 1165 "KEEP", 1166 "NOKEEP", 1167 "ORDER", 1168 "NOORDER", 1169 "NOCACHE", 1170 "CYCLE", 1171 "NOCYCLE", 1172 "NOMINVALUE", 1173 "NOMAXVALUE", 1174 "NOSCALE", 1175 "NOSHARD", 1176 ), 1177 tuple(), 1178 ), 1179 } 1180 1181 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1182 1183 USABLES: OPTIONS_TYPE = dict.fromkeys( 1184 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1185 ) 1186 1187 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1188 1189 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1190 "TYPE": ("EVOLUTION",), 1191 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1192 } 1193 1194 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1195 "NOT": ("ENFORCED",), 1196 "MATCH": ( 1197 "FULL", 1198 "PARTIAL", 1199 "SIMPLE", 1200 ), 1201 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1202 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1203 } 1204 1205 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1206 1207 CLONE_KEYWORDS = {"CLONE", "COPY"} 1208 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1209 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1210 1211 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1212 1213 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1214 1215 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1216 1217 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1218 1219 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1220 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1221 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1222 1223 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1224 1225 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1226 1227 ADD_CONSTRAINT_TOKENS = { 1228 TokenType.CONSTRAINT, 1229 TokenType.FOREIGN_KEY, 1230 TokenType.INDEX, 1231 TokenType.KEY, 1232 TokenType.PRIMARY_KEY, 1233 TokenType.UNIQUE, 1234 } 1235 1236 DISTINCT_TOKENS = {TokenType.DISTINCT} 1237 1238 NULL_TOKENS = {TokenType.NULL} 1239 1240 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1241 1242 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1243 1244 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1245 1246 STRICT_CAST = True 1247 1248 PREFIXED_PIVOT_COLUMNS = False 1249 IDENTIFY_PIVOT_STRINGS = False 1250 1251 LOG_DEFAULTS_TO_LN = False 1252 1253 # Whether ADD is present for each column added by ALTER TABLE 1254 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1255 1256 # Whether the table sample clause expects CSV syntax 1257 TABLESAMPLE_CSV = False 1258 1259 # The default method used for table sampling 1260 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1261 1262 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1263 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1264 1265 # Whether the TRIM function expects the characters to trim as its first argument 1266 TRIM_PATTERN_FIRST = False 1267 1268 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1269 STRING_ALIASES = False 1270 1271 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1272 MODIFIERS_ATTACHED_TO_SET_OP = True 1273 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1274 1275 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1276 NO_PAREN_IF_COMMANDS = True 1277 1278 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1279 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1280 1281 # Whether the `:` operator is used to extract a value from a VARIANT column 1282 COLON_IS_VARIANT_EXTRACT = False 1283 1284 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1285 # If this is True and '(' is not found, the keyword will be treated as an identifier 1286 VALUES_FOLLOWED_BY_PAREN = True 1287 1288 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1289 SUPPORTS_IMPLICIT_UNNEST = False 1290 1291 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1292 INTERVAL_SPANS = True 1293 1294 # Whether a PARTITION clause can follow a table reference 1295 SUPPORTS_PARTITION_SELECTION = False 1296 1297 __slots__ = ( 1298 "error_level", 1299 "error_message_context", 1300 "max_errors", 1301 "dialect", 1302 "sql", 1303 "errors", 1304 "_tokens", 1305 "_index", 1306 "_curr", 1307 "_next", 1308 "_prev", 1309 "_prev_comments", 1310 ) 1311 1312 # Autofilled 1313 SHOW_TRIE: t.Dict = {} 1314 SET_TRIE: t.Dict = {} 1315 1316 def __init__( 1317 self, 1318 error_level: t.Optional[ErrorLevel] = None, 1319 error_message_context: int = 100, 1320 max_errors: int = 3, 1321 dialect: DialectType = None, 1322 ): 1323 from sqlglot.dialects import Dialect 1324 1325 self.error_level = error_level or ErrorLevel.IMMEDIATE 1326 self.error_message_context = error_message_context 1327 self.max_errors = max_errors 1328 self.dialect = Dialect.get_or_raise(dialect) 1329 self.reset() 1330 1331 def reset(self): 1332 self.sql = "" 1333 self.errors = [] 1334 self._tokens = [] 1335 self._index = 0 1336 self._curr = None 1337 self._next = None 1338 self._prev = None 1339 self._prev_comments = None 1340 1341 def parse( 1342 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1343 ) -> t.List[t.Optional[exp.Expression]]: 1344 """ 1345 Parses a list of tokens and returns a list of syntax trees, one tree 1346 per parsed SQL statement. 1347 1348 Args: 1349 raw_tokens: The list of tokens. 1350 sql: The original SQL string, used to produce helpful debug messages. 1351 1352 Returns: 1353 The list of the produced syntax trees. 1354 """ 1355 return self._parse( 1356 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1357 ) 1358 1359 def parse_into( 1360 self, 1361 expression_types: exp.IntoType, 1362 raw_tokens: t.List[Token], 1363 sql: t.Optional[str] = None, 1364 ) -> t.List[t.Optional[exp.Expression]]: 1365 """ 1366 Parses a list of tokens into a given Expression type. If a collection of Expression 1367 types is given instead, this method will try to parse the token list into each one 1368 of them, stopping at the first for which the parsing succeeds. 1369 1370 Args: 1371 expression_types: The expression type(s) to try and parse the token list into. 1372 raw_tokens: The list of tokens. 1373 sql: The original SQL string, used to produce helpful debug messages. 1374 1375 Returns: 1376 The target Expression. 1377 """ 1378 errors = [] 1379 for expression_type in ensure_list(expression_types): 1380 parser = self.EXPRESSION_PARSERS.get(expression_type) 1381 if not parser: 1382 raise TypeError(f"No parser registered for {expression_type}") 1383 1384 try: 1385 return self._parse(parser, raw_tokens, sql) 1386 except ParseError as e: 1387 e.errors[0]["into_expression"] = expression_type 1388 errors.append(e) 1389 1390 raise ParseError( 1391 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1392 errors=merge_errors(errors), 1393 ) from errors[-1] 1394 1395 def _parse( 1396 self, 1397 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1398 raw_tokens: t.List[Token], 1399 sql: t.Optional[str] = None, 1400 ) -> t.List[t.Optional[exp.Expression]]: 1401 self.reset() 1402 self.sql = sql or "" 1403 1404 total = len(raw_tokens) 1405 chunks: t.List[t.List[Token]] = [[]] 1406 1407 for i, token in enumerate(raw_tokens): 1408 if token.token_type == TokenType.SEMICOLON: 1409 if token.comments: 1410 chunks.append([token]) 1411 1412 if i < total - 1: 1413 chunks.append([]) 1414 else: 1415 chunks[-1].append(token) 1416 1417 expressions = [] 1418 1419 for tokens in chunks: 1420 self._index = -1 1421 self._tokens = tokens 1422 self._advance() 1423 1424 expressions.append(parse_method(self)) 1425 1426 if self._index < len(self._tokens): 1427 self.raise_error("Invalid expression / Unexpected token") 1428 1429 self.check_errors() 1430 1431 return expressions 1432 1433 def check_errors(self) -> None: 1434 """Logs or raises any found errors, depending on the chosen error level setting.""" 1435 if self.error_level == ErrorLevel.WARN: 1436 for error in self.errors: 1437 logger.error(str(error)) 1438 elif self.error_level == ErrorLevel.RAISE and self.errors: 1439 raise ParseError( 1440 concat_messages(self.errors, self.max_errors), 1441 errors=merge_errors(self.errors), 1442 ) 1443 1444 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1445 """ 1446 Appends an error in the list of recorded errors or raises it, depending on the chosen 1447 error level setting. 1448 """ 1449 token = token or self._curr or self._prev or Token.string("") 1450 start = token.start 1451 end = token.end + 1 1452 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1453 highlight = self.sql[start:end] 1454 end_context = self.sql[end : end + self.error_message_context] 1455 1456 error = ParseError.new( 1457 f"{message}. Line {token.line}, Col: {token.col}.\n" 1458 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1459 description=message, 1460 line=token.line, 1461 col=token.col, 1462 start_context=start_context, 1463 highlight=highlight, 1464 end_context=end_context, 1465 ) 1466 1467 if self.error_level == ErrorLevel.IMMEDIATE: 1468 raise error 1469 1470 self.errors.append(error) 1471 1472 def expression( 1473 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1474 ) -> E: 1475 """ 1476 Creates a new, validated Expression. 1477 1478 Args: 1479 exp_class: The expression class to instantiate. 1480 comments: An optional list of comments to attach to the expression. 1481 kwargs: The arguments to set for the expression along with their respective values. 1482 1483 Returns: 1484 The target expression. 1485 """ 1486 instance = exp_class(**kwargs) 1487 instance.add_comments(comments) if comments else self._add_comments(instance) 1488 return self.validate_expression(instance) 1489 1490 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1491 if expression and self._prev_comments: 1492 expression.add_comments(self._prev_comments) 1493 self._prev_comments = None 1494 1495 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1496 """ 1497 Validates an Expression, making sure that all its mandatory arguments are set. 1498 1499 Args: 1500 expression: The expression to validate. 1501 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1502 1503 Returns: 1504 The validated expression. 1505 """ 1506 if self.error_level != ErrorLevel.IGNORE: 1507 for error_message in expression.error_messages(args): 1508 self.raise_error(error_message) 1509 1510 return expression 1511 1512 def _find_sql(self, start: Token, end: Token) -> str: 1513 return self.sql[start.start : end.end + 1] 1514 1515 def _is_connected(self) -> bool: 1516 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1517 1518 def _advance(self, times: int = 1) -> None: 1519 self._index += times 1520 self._curr = seq_get(self._tokens, self._index) 1521 self._next = seq_get(self._tokens, self._index + 1) 1522 1523 if self._index > 0: 1524 self._prev = self._tokens[self._index - 1] 1525 self._prev_comments = self._prev.comments 1526 else: 1527 self._prev = None 1528 self._prev_comments = None 1529 1530 def _retreat(self, index: int) -> None: 1531 if index != self._index: 1532 self._advance(index - self._index) 1533 1534 def _warn_unsupported(self) -> None: 1535 if len(self._tokens) <= 1: 1536 return 1537 1538 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1539 # interested in emitting a warning for the one being currently processed. 1540 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1541 1542 logger.warning( 1543 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1544 ) 1545 1546 def _parse_command(self) -> exp.Command: 1547 self._warn_unsupported() 1548 return self.expression( 1549 exp.Command, 1550 comments=self._prev_comments, 1551 this=self._prev.text.upper(), 1552 expression=self._parse_string(), 1553 ) 1554 1555 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1556 """ 1557 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1558 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1559 solve this by setting & resetting the parser state accordingly 1560 """ 1561 index = self._index 1562 error_level = self.error_level 1563 1564 self.error_level = ErrorLevel.IMMEDIATE 1565 try: 1566 this = parse_method() 1567 except ParseError: 1568 this = None 1569 finally: 1570 if not this or retreat: 1571 self._retreat(index) 1572 self.error_level = error_level 1573 1574 return this 1575 1576 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1577 start = self._prev 1578 exists = self._parse_exists() if allow_exists else None 1579 1580 self._match(TokenType.ON) 1581 1582 materialized = self._match_text_seq("MATERIALIZED") 1583 kind = self._match_set(self.CREATABLES) and self._prev 1584 if not kind: 1585 return self._parse_as_command(start) 1586 1587 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1588 this = self._parse_user_defined_function(kind=kind.token_type) 1589 elif kind.token_type == TokenType.TABLE: 1590 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1591 elif kind.token_type == TokenType.COLUMN: 1592 this = self._parse_column() 1593 else: 1594 this = self._parse_id_var() 1595 1596 self._match(TokenType.IS) 1597 1598 return self.expression( 1599 exp.Comment, 1600 this=this, 1601 kind=kind.text, 1602 expression=self._parse_string(), 1603 exists=exists, 1604 materialized=materialized, 1605 ) 1606 1607 def _parse_to_table( 1608 self, 1609 ) -> exp.ToTableProperty: 1610 table = self._parse_table_parts(schema=True) 1611 return self.expression(exp.ToTableProperty, this=table) 1612 1613 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1614 def _parse_ttl(self) -> exp.Expression: 1615 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1616 this = self._parse_bitwise() 1617 1618 if self._match_text_seq("DELETE"): 1619 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1620 if self._match_text_seq("RECOMPRESS"): 1621 return self.expression( 1622 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1623 ) 1624 if self._match_text_seq("TO", "DISK"): 1625 return self.expression( 1626 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1627 ) 1628 if self._match_text_seq("TO", "VOLUME"): 1629 return self.expression( 1630 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1631 ) 1632 1633 return this 1634 1635 expressions = self._parse_csv(_parse_ttl_action) 1636 where = self._parse_where() 1637 group = self._parse_group() 1638 1639 aggregates = None 1640 if group and self._match(TokenType.SET): 1641 aggregates = self._parse_csv(self._parse_set_item) 1642 1643 return self.expression( 1644 exp.MergeTreeTTL, 1645 expressions=expressions, 1646 where=where, 1647 group=group, 1648 aggregates=aggregates, 1649 ) 1650 1651 def _parse_statement(self) -> t.Optional[exp.Expression]: 1652 if self._curr is None: 1653 return None 1654 1655 if self._match_set(self.STATEMENT_PARSERS): 1656 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1657 1658 if self._match_set(self.dialect.tokenizer.COMMANDS): 1659 return self._parse_command() 1660 1661 expression = self._parse_expression() 1662 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1663 return self._parse_query_modifiers(expression) 1664 1665 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1666 start = self._prev 1667 temporary = self._match(TokenType.TEMPORARY) 1668 materialized = self._match_text_seq("MATERIALIZED") 1669 1670 kind = self._match_set(self.CREATABLES) and self._prev.text 1671 if not kind: 1672 return self._parse_as_command(start) 1673 1674 if_exists = exists or self._parse_exists() 1675 table = self._parse_table_parts( 1676 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1677 ) 1678 1679 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1680 1681 if self._match(TokenType.L_PAREN, advance=False): 1682 expressions = self._parse_wrapped_csv(self._parse_types) 1683 else: 1684 expressions = None 1685 1686 return self.expression( 1687 exp.Drop, 1688 comments=start.comments, 1689 exists=if_exists, 1690 this=table, 1691 expressions=expressions, 1692 kind=kind.upper(), 1693 temporary=temporary, 1694 materialized=materialized, 1695 cascade=self._match_text_seq("CASCADE"), 1696 constraints=self._match_text_seq("CONSTRAINTS"), 1697 purge=self._match_text_seq("PURGE"), 1698 cluster=cluster, 1699 ) 1700 1701 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1702 return ( 1703 self._match_text_seq("IF") 1704 and (not not_ or self._match(TokenType.NOT)) 1705 and self._match(TokenType.EXISTS) 1706 ) 1707 1708 def _parse_create(self) -> exp.Create | exp.Command: 1709 # Note: this can't be None because we've matched a statement parser 1710 start = self._prev 1711 comments = self._prev_comments 1712 1713 replace = ( 1714 start.token_type == TokenType.REPLACE 1715 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1716 or self._match_pair(TokenType.OR, TokenType.ALTER) 1717 ) 1718 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1719 1720 unique = self._match(TokenType.UNIQUE) 1721 1722 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1723 clustered = True 1724 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1725 "COLUMNSTORE" 1726 ): 1727 clustered = False 1728 else: 1729 clustered = None 1730 1731 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1732 self._advance() 1733 1734 properties = None 1735 create_token = self._match_set(self.CREATABLES) and self._prev 1736 1737 if not create_token: 1738 # exp.Properties.Location.POST_CREATE 1739 properties = self._parse_properties() 1740 create_token = self._match_set(self.CREATABLES) and self._prev 1741 1742 if not properties or not create_token: 1743 return self._parse_as_command(start) 1744 1745 concurrently = self._match_text_seq("CONCURRENTLY") 1746 exists = self._parse_exists(not_=True) 1747 this = None 1748 expression: t.Optional[exp.Expression] = None 1749 indexes = None 1750 no_schema_binding = None 1751 begin = None 1752 end = None 1753 clone = None 1754 1755 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1756 nonlocal properties 1757 if properties and temp_props: 1758 properties.expressions.extend(temp_props.expressions) 1759 elif temp_props: 1760 properties = temp_props 1761 1762 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1763 this = self._parse_user_defined_function(kind=create_token.token_type) 1764 1765 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1766 extend_props(self._parse_properties()) 1767 1768 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1769 extend_props(self._parse_properties()) 1770 1771 if not expression: 1772 if self._match(TokenType.COMMAND): 1773 expression = self._parse_as_command(self._prev) 1774 else: 1775 begin = self._match(TokenType.BEGIN) 1776 return_ = self._match_text_seq("RETURN") 1777 1778 if self._match(TokenType.STRING, advance=False): 1779 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1780 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1781 expression = self._parse_string() 1782 extend_props(self._parse_properties()) 1783 else: 1784 expression = self._parse_statement() 1785 1786 end = self._match_text_seq("END") 1787 1788 if return_: 1789 expression = self.expression(exp.Return, this=expression) 1790 elif create_token.token_type == TokenType.INDEX: 1791 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1792 if not self._match(TokenType.ON): 1793 index = self._parse_id_var() 1794 anonymous = False 1795 else: 1796 index = None 1797 anonymous = True 1798 1799 this = self._parse_index(index=index, anonymous=anonymous) 1800 elif create_token.token_type in self.DB_CREATABLES: 1801 table_parts = self._parse_table_parts( 1802 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1803 ) 1804 1805 # exp.Properties.Location.POST_NAME 1806 self._match(TokenType.COMMA) 1807 extend_props(self._parse_properties(before=True)) 1808 1809 this = self._parse_schema(this=table_parts) 1810 1811 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1812 extend_props(self._parse_properties()) 1813 1814 self._match(TokenType.ALIAS) 1815 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1816 # exp.Properties.Location.POST_ALIAS 1817 extend_props(self._parse_properties()) 1818 1819 if create_token.token_type == TokenType.SEQUENCE: 1820 expression = self._parse_types() 1821 extend_props(self._parse_properties()) 1822 else: 1823 expression = self._parse_ddl_select() 1824 1825 if create_token.token_type == TokenType.TABLE: 1826 # exp.Properties.Location.POST_EXPRESSION 1827 extend_props(self._parse_properties()) 1828 1829 indexes = [] 1830 while True: 1831 index = self._parse_index() 1832 1833 # exp.Properties.Location.POST_INDEX 1834 extend_props(self._parse_properties()) 1835 if not index: 1836 break 1837 else: 1838 self._match(TokenType.COMMA) 1839 indexes.append(index) 1840 elif create_token.token_type == TokenType.VIEW: 1841 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1842 no_schema_binding = True 1843 1844 shallow = self._match_text_seq("SHALLOW") 1845 1846 if self._match_texts(self.CLONE_KEYWORDS): 1847 copy = self._prev.text.lower() == "copy" 1848 clone = self.expression( 1849 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1850 ) 1851 1852 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1853 return self._parse_as_command(start) 1854 1855 return self.expression( 1856 exp.Create, 1857 comments=comments, 1858 this=this, 1859 kind=create_token.text.upper(), 1860 replace=replace, 1861 refresh=refresh, 1862 unique=unique, 1863 expression=expression, 1864 exists=exists, 1865 properties=properties, 1866 indexes=indexes, 1867 no_schema_binding=no_schema_binding, 1868 begin=begin, 1869 end=end, 1870 clone=clone, 1871 concurrently=concurrently, 1872 clustered=clustered, 1873 ) 1874 1875 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1876 seq = exp.SequenceProperties() 1877 1878 options = [] 1879 index = self._index 1880 1881 while self._curr: 1882 self._match(TokenType.COMMA) 1883 if self._match_text_seq("INCREMENT"): 1884 self._match_text_seq("BY") 1885 self._match_text_seq("=") 1886 seq.set("increment", self._parse_term()) 1887 elif self._match_text_seq("MINVALUE"): 1888 seq.set("minvalue", self._parse_term()) 1889 elif self._match_text_seq("MAXVALUE"): 1890 seq.set("maxvalue", self._parse_term()) 1891 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1892 self._match_text_seq("=") 1893 seq.set("start", self._parse_term()) 1894 elif self._match_text_seq("CACHE"): 1895 # T-SQL allows empty CACHE which is initialized dynamically 1896 seq.set("cache", self._parse_number() or True) 1897 elif self._match_text_seq("OWNED", "BY"): 1898 # "OWNED BY NONE" is the default 1899 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1900 else: 1901 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1902 if opt: 1903 options.append(opt) 1904 else: 1905 break 1906 1907 seq.set("options", options if options else None) 1908 return None if self._index == index else seq 1909 1910 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1911 # only used for teradata currently 1912 self._match(TokenType.COMMA) 1913 1914 kwargs = { 1915 "no": self._match_text_seq("NO"), 1916 "dual": self._match_text_seq("DUAL"), 1917 "before": self._match_text_seq("BEFORE"), 1918 "default": self._match_text_seq("DEFAULT"), 1919 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1920 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1921 "after": self._match_text_seq("AFTER"), 1922 "minimum": self._match_texts(("MIN", "MINIMUM")), 1923 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1924 } 1925 1926 if self._match_texts(self.PROPERTY_PARSERS): 1927 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1928 try: 1929 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1930 except TypeError: 1931 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1932 1933 return None 1934 1935 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1936 return self._parse_wrapped_csv(self._parse_property) 1937 1938 def _parse_property(self) -> t.Optional[exp.Expression]: 1939 if self._match_texts(self.PROPERTY_PARSERS): 1940 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1941 1942 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1943 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1944 1945 if self._match_text_seq("COMPOUND", "SORTKEY"): 1946 return self._parse_sortkey(compound=True) 1947 1948 if self._match_text_seq("SQL", "SECURITY"): 1949 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1950 1951 index = self._index 1952 key = self._parse_column() 1953 1954 if not self._match(TokenType.EQ): 1955 self._retreat(index) 1956 return self._parse_sequence_properties() 1957 1958 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1959 if isinstance(key, exp.Column): 1960 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1961 1962 value = self._parse_bitwise() or self._parse_var(any_token=True) 1963 1964 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1965 if isinstance(value, exp.Column): 1966 value = exp.var(value.name) 1967 1968 return self.expression(exp.Property, this=key, value=value) 1969 1970 def _parse_stored(self) -> exp.FileFormatProperty: 1971 self._match(TokenType.ALIAS) 1972 1973 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1974 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1975 1976 return self.expression( 1977 exp.FileFormatProperty, 1978 this=( 1979 self.expression( 1980 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1981 ) 1982 if input_format or output_format 1983 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1984 ), 1985 ) 1986 1987 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1988 field = self._parse_field() 1989 if isinstance(field, exp.Identifier) and not field.quoted: 1990 field = exp.var(field) 1991 1992 return field 1993 1994 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1995 self._match(TokenType.EQ) 1996 self._match(TokenType.ALIAS) 1997 1998 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1999 2000 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2001 properties = [] 2002 while True: 2003 if before: 2004 prop = self._parse_property_before() 2005 else: 2006 prop = self._parse_property() 2007 if not prop: 2008 break 2009 for p in ensure_list(prop): 2010 properties.append(p) 2011 2012 if properties: 2013 return self.expression(exp.Properties, expressions=properties) 2014 2015 return None 2016 2017 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2018 return self.expression( 2019 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2020 ) 2021 2022 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2023 if self._index >= 2: 2024 pre_volatile_token = self._tokens[self._index - 2] 2025 else: 2026 pre_volatile_token = None 2027 2028 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2029 return exp.VolatileProperty() 2030 2031 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2032 2033 def _parse_retention_period(self) -> exp.Var: 2034 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2035 number = self._parse_number() 2036 number_str = f"{number} " if number else "" 2037 unit = self._parse_var(any_token=True) 2038 return exp.var(f"{number_str}{unit}") 2039 2040 def _parse_system_versioning_property( 2041 self, with_: bool = False 2042 ) -> exp.WithSystemVersioningProperty: 2043 self._match(TokenType.EQ) 2044 prop = self.expression( 2045 exp.WithSystemVersioningProperty, 2046 **{ # type: ignore 2047 "on": True, 2048 "with": with_, 2049 }, 2050 ) 2051 2052 if self._match_text_seq("OFF"): 2053 prop.set("on", False) 2054 return prop 2055 2056 self._match(TokenType.ON) 2057 if self._match(TokenType.L_PAREN): 2058 while self._curr and not self._match(TokenType.R_PAREN): 2059 if self._match_text_seq("HISTORY_TABLE", "="): 2060 prop.set("this", self._parse_table_parts()) 2061 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2062 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2063 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2064 prop.set("retention_period", self._parse_retention_period()) 2065 2066 self._match(TokenType.COMMA) 2067 2068 return prop 2069 2070 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2071 self._match(TokenType.EQ) 2072 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2073 prop = self.expression(exp.DataDeletionProperty, on=on) 2074 2075 if self._match(TokenType.L_PAREN): 2076 while self._curr and not self._match(TokenType.R_PAREN): 2077 if self._match_text_seq("FILTER_COLUMN", "="): 2078 prop.set("filter_column", self._parse_column()) 2079 elif self._match_text_seq("RETENTION_PERIOD", "="): 2080 prop.set("retention_period", self._parse_retention_period()) 2081 2082 self._match(TokenType.COMMA) 2083 2084 return prop 2085 2086 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2087 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2088 prop = self._parse_system_versioning_property(with_=True) 2089 self._match_r_paren() 2090 return prop 2091 2092 if self._match(TokenType.L_PAREN, advance=False): 2093 return self._parse_wrapped_properties() 2094 2095 if self._match_text_seq("JOURNAL"): 2096 return self._parse_withjournaltable() 2097 2098 if self._match_texts(self.VIEW_ATTRIBUTES): 2099 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2100 2101 if self._match_text_seq("DATA"): 2102 return self._parse_withdata(no=False) 2103 elif self._match_text_seq("NO", "DATA"): 2104 return self._parse_withdata(no=True) 2105 2106 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2107 return self._parse_serde_properties(with_=True) 2108 2109 if self._match(TokenType.SCHEMA): 2110 return self.expression( 2111 exp.WithSchemaBindingProperty, 2112 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2113 ) 2114 2115 if not self._next: 2116 return None 2117 2118 return self._parse_withisolatedloading() 2119 2120 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2121 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2122 self._match(TokenType.EQ) 2123 2124 user = self._parse_id_var() 2125 self._match(TokenType.PARAMETER) 2126 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2127 2128 if not user or not host: 2129 return None 2130 2131 return exp.DefinerProperty(this=f"{user}@{host}") 2132 2133 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2134 self._match(TokenType.TABLE) 2135 self._match(TokenType.EQ) 2136 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2137 2138 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2139 return self.expression(exp.LogProperty, no=no) 2140 2141 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2142 return self.expression(exp.JournalProperty, **kwargs) 2143 2144 def _parse_checksum(self) -> exp.ChecksumProperty: 2145 self._match(TokenType.EQ) 2146 2147 on = None 2148 if self._match(TokenType.ON): 2149 on = True 2150 elif self._match_text_seq("OFF"): 2151 on = False 2152 2153 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2154 2155 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2156 return self.expression( 2157 exp.Cluster, 2158 expressions=( 2159 self._parse_wrapped_csv(self._parse_ordered) 2160 if wrapped 2161 else self._parse_csv(self._parse_ordered) 2162 ), 2163 ) 2164 2165 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2166 self._match_text_seq("BY") 2167 2168 self._match_l_paren() 2169 expressions = self._parse_csv(self._parse_column) 2170 self._match_r_paren() 2171 2172 if self._match_text_seq("SORTED", "BY"): 2173 self._match_l_paren() 2174 sorted_by = self._parse_csv(self._parse_ordered) 2175 self._match_r_paren() 2176 else: 2177 sorted_by = None 2178 2179 self._match(TokenType.INTO) 2180 buckets = self._parse_number() 2181 self._match_text_seq("BUCKETS") 2182 2183 return self.expression( 2184 exp.ClusteredByProperty, 2185 expressions=expressions, 2186 sorted_by=sorted_by, 2187 buckets=buckets, 2188 ) 2189 2190 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2191 if not self._match_text_seq("GRANTS"): 2192 self._retreat(self._index - 1) 2193 return None 2194 2195 return self.expression(exp.CopyGrantsProperty) 2196 2197 def _parse_freespace(self) -> exp.FreespaceProperty: 2198 self._match(TokenType.EQ) 2199 return self.expression( 2200 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2201 ) 2202 2203 def _parse_mergeblockratio( 2204 self, no: bool = False, default: bool = False 2205 ) -> exp.MergeBlockRatioProperty: 2206 if self._match(TokenType.EQ): 2207 return self.expression( 2208 exp.MergeBlockRatioProperty, 2209 this=self._parse_number(), 2210 percent=self._match(TokenType.PERCENT), 2211 ) 2212 2213 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2214 2215 def _parse_datablocksize( 2216 self, 2217 default: t.Optional[bool] = None, 2218 minimum: t.Optional[bool] = None, 2219 maximum: t.Optional[bool] = None, 2220 ) -> exp.DataBlocksizeProperty: 2221 self._match(TokenType.EQ) 2222 size = self._parse_number() 2223 2224 units = None 2225 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2226 units = self._prev.text 2227 2228 return self.expression( 2229 exp.DataBlocksizeProperty, 2230 size=size, 2231 units=units, 2232 default=default, 2233 minimum=minimum, 2234 maximum=maximum, 2235 ) 2236 2237 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2238 self._match(TokenType.EQ) 2239 always = self._match_text_seq("ALWAYS") 2240 manual = self._match_text_seq("MANUAL") 2241 never = self._match_text_seq("NEVER") 2242 default = self._match_text_seq("DEFAULT") 2243 2244 autotemp = None 2245 if self._match_text_seq("AUTOTEMP"): 2246 autotemp = self._parse_schema() 2247 2248 return self.expression( 2249 exp.BlockCompressionProperty, 2250 always=always, 2251 manual=manual, 2252 never=never, 2253 default=default, 2254 autotemp=autotemp, 2255 ) 2256 2257 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2258 index = self._index 2259 no = self._match_text_seq("NO") 2260 concurrent = self._match_text_seq("CONCURRENT") 2261 2262 if not self._match_text_seq("ISOLATED", "LOADING"): 2263 self._retreat(index) 2264 return None 2265 2266 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2267 return self.expression( 2268 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2269 ) 2270 2271 def _parse_locking(self) -> exp.LockingProperty: 2272 if self._match(TokenType.TABLE): 2273 kind = "TABLE" 2274 elif self._match(TokenType.VIEW): 2275 kind = "VIEW" 2276 elif self._match(TokenType.ROW): 2277 kind = "ROW" 2278 elif self._match_text_seq("DATABASE"): 2279 kind = "DATABASE" 2280 else: 2281 kind = None 2282 2283 if kind in ("DATABASE", "TABLE", "VIEW"): 2284 this = self._parse_table_parts() 2285 else: 2286 this = None 2287 2288 if self._match(TokenType.FOR): 2289 for_or_in = "FOR" 2290 elif self._match(TokenType.IN): 2291 for_or_in = "IN" 2292 else: 2293 for_or_in = None 2294 2295 if self._match_text_seq("ACCESS"): 2296 lock_type = "ACCESS" 2297 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2298 lock_type = "EXCLUSIVE" 2299 elif self._match_text_seq("SHARE"): 2300 lock_type = "SHARE" 2301 elif self._match_text_seq("READ"): 2302 lock_type = "READ" 2303 elif self._match_text_seq("WRITE"): 2304 lock_type = "WRITE" 2305 elif self._match_text_seq("CHECKSUM"): 2306 lock_type = "CHECKSUM" 2307 else: 2308 lock_type = None 2309 2310 override = self._match_text_seq("OVERRIDE") 2311 2312 return self.expression( 2313 exp.LockingProperty, 2314 this=this, 2315 kind=kind, 2316 for_or_in=for_or_in, 2317 lock_type=lock_type, 2318 override=override, 2319 ) 2320 2321 def _parse_partition_by(self) -> t.List[exp.Expression]: 2322 if self._match(TokenType.PARTITION_BY): 2323 return self._parse_csv(self._parse_assignment) 2324 return [] 2325 2326 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2327 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2328 if self._match_text_seq("MINVALUE"): 2329 return exp.var("MINVALUE") 2330 if self._match_text_seq("MAXVALUE"): 2331 return exp.var("MAXVALUE") 2332 return self._parse_bitwise() 2333 2334 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2335 expression = None 2336 from_expressions = None 2337 to_expressions = None 2338 2339 if self._match(TokenType.IN): 2340 this = self._parse_wrapped_csv(self._parse_bitwise) 2341 elif self._match(TokenType.FROM): 2342 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2343 self._match_text_seq("TO") 2344 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2345 elif self._match_text_seq("WITH", "(", "MODULUS"): 2346 this = self._parse_number() 2347 self._match_text_seq(",", "REMAINDER") 2348 expression = self._parse_number() 2349 self._match_r_paren() 2350 else: 2351 self.raise_error("Failed to parse partition bound spec.") 2352 2353 return self.expression( 2354 exp.PartitionBoundSpec, 2355 this=this, 2356 expression=expression, 2357 from_expressions=from_expressions, 2358 to_expressions=to_expressions, 2359 ) 2360 2361 # https://www.postgresql.org/docs/current/sql-createtable.html 2362 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2363 if not self._match_text_seq("OF"): 2364 self._retreat(self._index - 1) 2365 return None 2366 2367 this = self._parse_table(schema=True) 2368 2369 if self._match(TokenType.DEFAULT): 2370 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2371 elif self._match_text_seq("FOR", "VALUES"): 2372 expression = self._parse_partition_bound_spec() 2373 else: 2374 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2375 2376 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2377 2378 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2379 self._match(TokenType.EQ) 2380 return self.expression( 2381 exp.PartitionedByProperty, 2382 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2383 ) 2384 2385 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2386 if self._match_text_seq("AND", "STATISTICS"): 2387 statistics = True 2388 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2389 statistics = False 2390 else: 2391 statistics = None 2392 2393 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2394 2395 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2396 if self._match_text_seq("SQL"): 2397 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2398 return None 2399 2400 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2401 if self._match_text_seq("SQL", "DATA"): 2402 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2403 return None 2404 2405 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2406 if self._match_text_seq("PRIMARY", "INDEX"): 2407 return exp.NoPrimaryIndexProperty() 2408 if self._match_text_seq("SQL"): 2409 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2410 return None 2411 2412 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2413 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2414 return exp.OnCommitProperty() 2415 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2416 return exp.OnCommitProperty(delete=True) 2417 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2418 2419 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2420 if self._match_text_seq("SQL", "DATA"): 2421 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2422 return None 2423 2424 def _parse_distkey(self) -> exp.DistKeyProperty: 2425 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2426 2427 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2428 table = self._parse_table(schema=True) 2429 2430 options = [] 2431 while self._match_texts(("INCLUDING", "EXCLUDING")): 2432 this = self._prev.text.upper() 2433 2434 id_var = self._parse_id_var() 2435 if not id_var: 2436 return None 2437 2438 options.append( 2439 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2440 ) 2441 2442 return self.expression(exp.LikeProperty, this=table, expressions=options) 2443 2444 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2445 return self.expression( 2446 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2447 ) 2448 2449 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2450 self._match(TokenType.EQ) 2451 return self.expression( 2452 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2453 ) 2454 2455 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2456 self._match_text_seq("WITH", "CONNECTION") 2457 return self.expression( 2458 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2459 ) 2460 2461 def _parse_returns(self) -> exp.ReturnsProperty: 2462 value: t.Optional[exp.Expression] 2463 null = None 2464 is_table = self._match(TokenType.TABLE) 2465 2466 if is_table: 2467 if self._match(TokenType.LT): 2468 value = self.expression( 2469 exp.Schema, 2470 this="TABLE", 2471 expressions=self._parse_csv(self._parse_struct_types), 2472 ) 2473 if not self._match(TokenType.GT): 2474 self.raise_error("Expecting >") 2475 else: 2476 value = self._parse_schema(exp.var("TABLE")) 2477 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2478 null = True 2479 value = None 2480 else: 2481 value = self._parse_types() 2482 2483 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2484 2485 def _parse_describe(self) -> exp.Describe: 2486 kind = self._match_set(self.CREATABLES) and self._prev.text 2487 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2488 if self._match(TokenType.DOT): 2489 style = None 2490 self._retreat(self._index - 2) 2491 this = self._parse_table(schema=True) 2492 properties = self._parse_properties() 2493 expressions = properties.expressions if properties else None 2494 return self.expression( 2495 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2496 ) 2497 2498 def _parse_insert(self) -> exp.Insert: 2499 comments = ensure_list(self._prev_comments) 2500 hint = self._parse_hint() 2501 overwrite = self._match(TokenType.OVERWRITE) 2502 ignore = self._match(TokenType.IGNORE) 2503 local = self._match_text_seq("LOCAL") 2504 alternative = None 2505 is_function = None 2506 2507 if self._match_text_seq("DIRECTORY"): 2508 this: t.Optional[exp.Expression] = self.expression( 2509 exp.Directory, 2510 this=self._parse_var_or_string(), 2511 local=local, 2512 row_format=self._parse_row_format(match_row=True), 2513 ) 2514 else: 2515 if self._match(TokenType.OR): 2516 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2517 2518 self._match(TokenType.INTO) 2519 comments += ensure_list(self._prev_comments) 2520 self._match(TokenType.TABLE) 2521 is_function = self._match(TokenType.FUNCTION) 2522 2523 this = ( 2524 self._parse_table(schema=True, parse_partition=True) 2525 if not is_function 2526 else self._parse_function() 2527 ) 2528 2529 returning = self._parse_returning() 2530 2531 return self.expression( 2532 exp.Insert, 2533 comments=comments, 2534 hint=hint, 2535 is_function=is_function, 2536 this=this, 2537 stored=self._match_text_seq("STORED") and self._parse_stored(), 2538 by_name=self._match_text_seq("BY", "NAME"), 2539 exists=self._parse_exists(), 2540 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2541 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2542 conflict=self._parse_on_conflict(), 2543 returning=returning or self._parse_returning(), 2544 overwrite=overwrite, 2545 alternative=alternative, 2546 ignore=ignore, 2547 ) 2548 2549 def _parse_kill(self) -> exp.Kill: 2550 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2551 2552 return self.expression( 2553 exp.Kill, 2554 this=self._parse_primary(), 2555 kind=kind, 2556 ) 2557 2558 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2559 conflict = self._match_text_seq("ON", "CONFLICT") 2560 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2561 2562 if not conflict and not duplicate: 2563 return None 2564 2565 conflict_keys = None 2566 constraint = None 2567 2568 if conflict: 2569 if self._match_text_seq("ON", "CONSTRAINT"): 2570 constraint = self._parse_id_var() 2571 elif self._match(TokenType.L_PAREN): 2572 conflict_keys = self._parse_csv(self._parse_id_var) 2573 self._match_r_paren() 2574 2575 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2576 if self._prev.token_type == TokenType.UPDATE: 2577 self._match(TokenType.SET) 2578 expressions = self._parse_csv(self._parse_equality) 2579 else: 2580 expressions = None 2581 2582 return self.expression( 2583 exp.OnConflict, 2584 duplicate=duplicate, 2585 expressions=expressions, 2586 action=action, 2587 conflict_keys=conflict_keys, 2588 constraint=constraint, 2589 ) 2590 2591 def _parse_returning(self) -> t.Optional[exp.Returning]: 2592 if not self._match(TokenType.RETURNING): 2593 return None 2594 return self.expression( 2595 exp.Returning, 2596 expressions=self._parse_csv(self._parse_expression), 2597 into=self._match(TokenType.INTO) and self._parse_table_part(), 2598 ) 2599 2600 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2601 if not self._match(TokenType.FORMAT): 2602 return None 2603 return self._parse_row_format() 2604 2605 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2606 index = self._index 2607 with_ = with_ or self._match_text_seq("WITH") 2608 2609 if not self._match(TokenType.SERDE_PROPERTIES): 2610 self._retreat(index) 2611 return None 2612 return self.expression( 2613 exp.SerdeProperties, 2614 **{ # type: ignore 2615 "expressions": self._parse_wrapped_properties(), 2616 "with": with_, 2617 }, 2618 ) 2619 2620 def _parse_row_format( 2621 self, match_row: bool = False 2622 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2623 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2624 return None 2625 2626 if self._match_text_seq("SERDE"): 2627 this = self._parse_string() 2628 2629 serde_properties = self._parse_serde_properties() 2630 2631 return self.expression( 2632 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2633 ) 2634 2635 self._match_text_seq("DELIMITED") 2636 2637 kwargs = {} 2638 2639 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2640 kwargs["fields"] = self._parse_string() 2641 if self._match_text_seq("ESCAPED", "BY"): 2642 kwargs["escaped"] = self._parse_string() 2643 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2644 kwargs["collection_items"] = self._parse_string() 2645 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2646 kwargs["map_keys"] = self._parse_string() 2647 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2648 kwargs["lines"] = self._parse_string() 2649 if self._match_text_seq("NULL", "DEFINED", "AS"): 2650 kwargs["null"] = self._parse_string() 2651 2652 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2653 2654 def _parse_load(self) -> exp.LoadData | exp.Command: 2655 if self._match_text_seq("DATA"): 2656 local = self._match_text_seq("LOCAL") 2657 self._match_text_seq("INPATH") 2658 inpath = self._parse_string() 2659 overwrite = self._match(TokenType.OVERWRITE) 2660 self._match_pair(TokenType.INTO, TokenType.TABLE) 2661 2662 return self.expression( 2663 exp.LoadData, 2664 this=self._parse_table(schema=True), 2665 local=local, 2666 overwrite=overwrite, 2667 inpath=inpath, 2668 partition=self._parse_partition(), 2669 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2670 serde=self._match_text_seq("SERDE") and self._parse_string(), 2671 ) 2672 return self._parse_as_command(self._prev) 2673 2674 def _parse_delete(self) -> exp.Delete: 2675 # This handles MySQL's "Multiple-Table Syntax" 2676 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2677 tables = None 2678 comments = self._prev_comments 2679 if not self._match(TokenType.FROM, advance=False): 2680 tables = self._parse_csv(self._parse_table) or None 2681 2682 returning = self._parse_returning() 2683 2684 return self.expression( 2685 exp.Delete, 2686 comments=comments, 2687 tables=tables, 2688 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2689 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2690 where=self._parse_where(), 2691 returning=returning or self._parse_returning(), 2692 limit=self._parse_limit(), 2693 ) 2694 2695 def _parse_update(self) -> exp.Update: 2696 comments = self._prev_comments 2697 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2698 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2699 returning = self._parse_returning() 2700 return self.expression( 2701 exp.Update, 2702 comments=comments, 2703 **{ # type: ignore 2704 "this": this, 2705 "expressions": expressions, 2706 "from": self._parse_from(joins=True), 2707 "where": self._parse_where(), 2708 "returning": returning or self._parse_returning(), 2709 "order": self._parse_order(), 2710 "limit": self._parse_limit(), 2711 }, 2712 ) 2713 2714 def _parse_uncache(self) -> exp.Uncache: 2715 if not self._match(TokenType.TABLE): 2716 self.raise_error("Expecting TABLE after UNCACHE") 2717 2718 return self.expression( 2719 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2720 ) 2721 2722 def _parse_cache(self) -> exp.Cache: 2723 lazy = self._match_text_seq("LAZY") 2724 self._match(TokenType.TABLE) 2725 table = self._parse_table(schema=True) 2726 2727 options = [] 2728 if self._match_text_seq("OPTIONS"): 2729 self._match_l_paren() 2730 k = self._parse_string() 2731 self._match(TokenType.EQ) 2732 v = self._parse_string() 2733 options = [k, v] 2734 self._match_r_paren() 2735 2736 self._match(TokenType.ALIAS) 2737 return self.expression( 2738 exp.Cache, 2739 this=table, 2740 lazy=lazy, 2741 options=options, 2742 expression=self._parse_select(nested=True), 2743 ) 2744 2745 def _parse_partition(self) -> t.Optional[exp.Partition]: 2746 if not self._match(TokenType.PARTITION): 2747 return None 2748 2749 return self.expression( 2750 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2751 ) 2752 2753 def _parse_value(self) -> t.Optional[exp.Tuple]: 2754 if self._match(TokenType.L_PAREN): 2755 expressions = self._parse_csv(self._parse_expression) 2756 self._match_r_paren() 2757 return self.expression(exp.Tuple, expressions=expressions) 2758 2759 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2760 expression = self._parse_expression() 2761 if expression: 2762 return self.expression(exp.Tuple, expressions=[expression]) 2763 return None 2764 2765 def _parse_projections(self) -> t.List[exp.Expression]: 2766 return self._parse_expressions() 2767 2768 def _parse_select( 2769 self, 2770 nested: bool = False, 2771 table: bool = False, 2772 parse_subquery_alias: bool = True, 2773 parse_set_operation: bool = True, 2774 ) -> t.Optional[exp.Expression]: 2775 cte = self._parse_with() 2776 2777 if cte: 2778 this = self._parse_statement() 2779 2780 if not this: 2781 self.raise_error("Failed to parse any statement following CTE") 2782 return cte 2783 2784 if "with" in this.arg_types: 2785 this.set("with", cte) 2786 else: 2787 self.raise_error(f"{this.key} does not support CTE") 2788 this = cte 2789 2790 return this 2791 2792 # duckdb supports leading with FROM x 2793 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2794 2795 if self._match(TokenType.SELECT): 2796 comments = self._prev_comments 2797 2798 hint = self._parse_hint() 2799 2800 if self._next and not self._next.token_type == TokenType.DOT: 2801 all_ = self._match(TokenType.ALL) 2802 distinct = self._match_set(self.DISTINCT_TOKENS) 2803 else: 2804 all_, distinct = None, None 2805 2806 kind = ( 2807 self._match(TokenType.ALIAS) 2808 and self._match_texts(("STRUCT", "VALUE")) 2809 and self._prev.text.upper() 2810 ) 2811 2812 if distinct: 2813 distinct = self.expression( 2814 exp.Distinct, 2815 on=self._parse_value() if self._match(TokenType.ON) else None, 2816 ) 2817 2818 if all_ and distinct: 2819 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2820 2821 limit = self._parse_limit(top=True) 2822 projections = self._parse_projections() 2823 2824 this = self.expression( 2825 exp.Select, 2826 kind=kind, 2827 hint=hint, 2828 distinct=distinct, 2829 expressions=projections, 2830 limit=limit, 2831 ) 2832 this.comments = comments 2833 2834 into = self._parse_into() 2835 if into: 2836 this.set("into", into) 2837 2838 if not from_: 2839 from_ = self._parse_from() 2840 2841 if from_: 2842 this.set("from", from_) 2843 2844 this = self._parse_query_modifiers(this) 2845 elif (table or nested) and self._match(TokenType.L_PAREN): 2846 if self._match(TokenType.PIVOT): 2847 this = self._parse_simplified_pivot() 2848 elif self._match(TokenType.FROM): 2849 this = exp.select("*").from_( 2850 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2851 ) 2852 else: 2853 this = ( 2854 self._parse_table() 2855 if table 2856 else self._parse_select(nested=True, parse_set_operation=False) 2857 ) 2858 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2859 2860 self._match_r_paren() 2861 2862 # We return early here so that the UNION isn't attached to the subquery by the 2863 # following call to _parse_set_operations, but instead becomes the parent node 2864 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2865 elif self._match(TokenType.VALUES, advance=False): 2866 this = self._parse_derived_table_values() 2867 elif from_: 2868 this = exp.select("*").from_(from_.this, copy=False) 2869 elif self._match(TokenType.SUMMARIZE): 2870 table = self._match(TokenType.TABLE) 2871 this = self._parse_select() or self._parse_string() or self._parse_table() 2872 return self.expression(exp.Summarize, this=this, table=table) 2873 elif self._match(TokenType.DESCRIBE): 2874 this = self._parse_describe() 2875 elif self._match_text_seq("STREAM"): 2876 this = self.expression(exp.Stream, this=self._parse_function()) 2877 else: 2878 this = None 2879 2880 return self._parse_set_operations(this) if parse_set_operation else this 2881 2882 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2883 if not skip_with_token and not self._match(TokenType.WITH): 2884 return None 2885 2886 comments = self._prev_comments 2887 recursive = self._match(TokenType.RECURSIVE) 2888 2889 expressions = [] 2890 while True: 2891 expressions.append(self._parse_cte()) 2892 2893 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2894 break 2895 else: 2896 self._match(TokenType.WITH) 2897 2898 return self.expression( 2899 exp.With, comments=comments, expressions=expressions, recursive=recursive 2900 ) 2901 2902 def _parse_cte(self) -> exp.CTE: 2903 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2904 if not alias or not alias.this: 2905 self.raise_error("Expected CTE to have alias") 2906 2907 self._match(TokenType.ALIAS) 2908 comments = self._prev_comments 2909 2910 if self._match_text_seq("NOT", "MATERIALIZED"): 2911 materialized = False 2912 elif self._match_text_seq("MATERIALIZED"): 2913 materialized = True 2914 else: 2915 materialized = None 2916 2917 return self.expression( 2918 exp.CTE, 2919 this=self._parse_wrapped(self._parse_statement), 2920 alias=alias, 2921 materialized=materialized, 2922 comments=comments, 2923 ) 2924 2925 def _parse_table_alias( 2926 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2927 ) -> t.Optional[exp.TableAlias]: 2928 any_token = self._match(TokenType.ALIAS) 2929 alias = ( 2930 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2931 or self._parse_string_as_identifier() 2932 ) 2933 2934 index = self._index 2935 if self._match(TokenType.L_PAREN): 2936 columns = self._parse_csv(self._parse_function_parameter) 2937 self._match_r_paren() if columns else self._retreat(index) 2938 else: 2939 columns = None 2940 2941 if not alias and not columns: 2942 return None 2943 2944 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2945 2946 # We bubble up comments from the Identifier to the TableAlias 2947 if isinstance(alias, exp.Identifier): 2948 table_alias.add_comments(alias.pop_comments()) 2949 2950 return table_alias 2951 2952 def _parse_subquery( 2953 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2954 ) -> t.Optional[exp.Subquery]: 2955 if not this: 2956 return None 2957 2958 return self.expression( 2959 exp.Subquery, 2960 this=this, 2961 pivots=self._parse_pivots(), 2962 alias=self._parse_table_alias() if parse_alias else None, 2963 ) 2964 2965 def _implicit_unnests_to_explicit(self, this: E) -> E: 2966 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2967 2968 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2969 for i, join in enumerate(this.args.get("joins") or []): 2970 table = join.this 2971 normalized_table = table.copy() 2972 normalized_table.meta["maybe_column"] = True 2973 normalized_table = _norm(normalized_table, dialect=self.dialect) 2974 2975 if isinstance(table, exp.Table) and not join.args.get("on"): 2976 if normalized_table.parts[0].name in refs: 2977 table_as_column = table.to_column() 2978 unnest = exp.Unnest(expressions=[table_as_column]) 2979 2980 # Table.to_column creates a parent Alias node that we want to convert to 2981 # a TableAlias and attach to the Unnest, so it matches the parser's output 2982 if isinstance(table.args.get("alias"), exp.TableAlias): 2983 table_as_column.replace(table_as_column.this) 2984 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2985 2986 table.replace(unnest) 2987 2988 refs.add(normalized_table.alias_or_name) 2989 2990 return this 2991 2992 def _parse_query_modifiers( 2993 self, this: t.Optional[exp.Expression] 2994 ) -> t.Optional[exp.Expression]: 2995 if isinstance(this, (exp.Query, exp.Table)): 2996 for join in self._parse_joins(): 2997 this.append("joins", join) 2998 for lateral in iter(self._parse_lateral, None): 2999 this.append("laterals", lateral) 3000 3001 while True: 3002 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3003 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3004 key, expression = parser(self) 3005 3006 if expression: 3007 this.set(key, expression) 3008 if key == "limit": 3009 offset = expression.args.pop("offset", None) 3010 3011 if offset: 3012 offset = exp.Offset(expression=offset) 3013 this.set("offset", offset) 3014 3015 limit_by_expressions = expression.expressions 3016 expression.set("expressions", None) 3017 offset.set("expressions", limit_by_expressions) 3018 continue 3019 break 3020 3021 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3022 this = self._implicit_unnests_to_explicit(this) 3023 3024 return this 3025 3026 def _parse_hint(self) -> t.Optional[exp.Hint]: 3027 if self._match(TokenType.HINT): 3028 hints = [] 3029 for hint in iter( 3030 lambda: self._parse_csv( 3031 lambda: self._parse_function() or self._parse_var(upper=True) 3032 ), 3033 [], 3034 ): 3035 hints.extend(hint) 3036 3037 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3038 self.raise_error("Expected */ after HINT") 3039 3040 return self.expression(exp.Hint, expressions=hints) 3041 3042 return None 3043 3044 def _parse_into(self) -> t.Optional[exp.Into]: 3045 if not self._match(TokenType.INTO): 3046 return None 3047 3048 temp = self._match(TokenType.TEMPORARY) 3049 unlogged = self._match_text_seq("UNLOGGED") 3050 self._match(TokenType.TABLE) 3051 3052 return self.expression( 3053 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3054 ) 3055 3056 def _parse_from( 3057 self, joins: bool = False, skip_from_token: bool = False 3058 ) -> t.Optional[exp.From]: 3059 if not skip_from_token and not self._match(TokenType.FROM): 3060 return None 3061 3062 return self.expression( 3063 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3064 ) 3065 3066 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3067 return self.expression( 3068 exp.MatchRecognizeMeasure, 3069 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3070 this=self._parse_expression(), 3071 ) 3072 3073 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3074 if not self._match(TokenType.MATCH_RECOGNIZE): 3075 return None 3076 3077 self._match_l_paren() 3078 3079 partition = self._parse_partition_by() 3080 order = self._parse_order() 3081 3082 measures = ( 3083 self._parse_csv(self._parse_match_recognize_measure) 3084 if self._match_text_seq("MEASURES") 3085 else None 3086 ) 3087 3088 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3089 rows = exp.var("ONE ROW PER MATCH") 3090 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3091 text = "ALL ROWS PER MATCH" 3092 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3093 text += " SHOW EMPTY MATCHES" 3094 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3095 text += " OMIT EMPTY MATCHES" 3096 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3097 text += " WITH UNMATCHED ROWS" 3098 rows = exp.var(text) 3099 else: 3100 rows = None 3101 3102 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3103 text = "AFTER MATCH SKIP" 3104 if self._match_text_seq("PAST", "LAST", "ROW"): 3105 text += " PAST LAST ROW" 3106 elif self._match_text_seq("TO", "NEXT", "ROW"): 3107 text += " TO NEXT ROW" 3108 elif self._match_text_seq("TO", "FIRST"): 3109 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3110 elif self._match_text_seq("TO", "LAST"): 3111 text += f" TO LAST {self._advance_any().text}" # type: ignore 3112 after = exp.var(text) 3113 else: 3114 after = None 3115 3116 if self._match_text_seq("PATTERN"): 3117 self._match_l_paren() 3118 3119 if not self._curr: 3120 self.raise_error("Expecting )", self._curr) 3121 3122 paren = 1 3123 start = self._curr 3124 3125 while self._curr and paren > 0: 3126 if self._curr.token_type == TokenType.L_PAREN: 3127 paren += 1 3128 if self._curr.token_type == TokenType.R_PAREN: 3129 paren -= 1 3130 3131 end = self._prev 3132 self._advance() 3133 3134 if paren > 0: 3135 self.raise_error("Expecting )", self._curr) 3136 3137 pattern = exp.var(self._find_sql(start, end)) 3138 else: 3139 pattern = None 3140 3141 define = ( 3142 self._parse_csv(self._parse_name_as_expression) 3143 if self._match_text_seq("DEFINE") 3144 else None 3145 ) 3146 3147 self._match_r_paren() 3148 3149 return self.expression( 3150 exp.MatchRecognize, 3151 partition_by=partition, 3152 order=order, 3153 measures=measures, 3154 rows=rows, 3155 after=after, 3156 pattern=pattern, 3157 define=define, 3158 alias=self._parse_table_alias(), 3159 ) 3160 3161 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3162 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3163 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3164 cross_apply = False 3165 3166 if cross_apply is not None: 3167 this = self._parse_select(table=True) 3168 view = None 3169 outer = None 3170 elif self._match(TokenType.LATERAL): 3171 this = self._parse_select(table=True) 3172 view = self._match(TokenType.VIEW) 3173 outer = self._match(TokenType.OUTER) 3174 else: 3175 return None 3176 3177 if not this: 3178 this = ( 3179 self._parse_unnest() 3180 or self._parse_function() 3181 or self._parse_id_var(any_token=False) 3182 ) 3183 3184 while self._match(TokenType.DOT): 3185 this = exp.Dot( 3186 this=this, 3187 expression=self._parse_function() or self._parse_id_var(any_token=False), 3188 ) 3189 3190 if view: 3191 table = self._parse_id_var(any_token=False) 3192 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3193 table_alias: t.Optional[exp.TableAlias] = self.expression( 3194 exp.TableAlias, this=table, columns=columns 3195 ) 3196 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3197 # We move the alias from the lateral's child node to the lateral itself 3198 table_alias = this.args["alias"].pop() 3199 else: 3200 table_alias = self._parse_table_alias() 3201 3202 return self.expression( 3203 exp.Lateral, 3204 this=this, 3205 view=view, 3206 outer=outer, 3207 alias=table_alias, 3208 cross_apply=cross_apply, 3209 ) 3210 3211 def _parse_join_parts( 3212 self, 3213 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3214 return ( 3215 self._match_set(self.JOIN_METHODS) and self._prev, 3216 self._match_set(self.JOIN_SIDES) and self._prev, 3217 self._match_set(self.JOIN_KINDS) and self._prev, 3218 ) 3219 3220 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3221 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3222 this = self._parse_column() 3223 if isinstance(this, exp.Column): 3224 return this.this 3225 return this 3226 3227 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3228 3229 def _parse_join( 3230 self, skip_join_token: bool = False, parse_bracket: bool = False 3231 ) -> t.Optional[exp.Join]: 3232 if self._match(TokenType.COMMA): 3233 return self.expression(exp.Join, this=self._parse_table()) 3234 3235 index = self._index 3236 method, side, kind = self._parse_join_parts() 3237 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3238 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3239 3240 if not skip_join_token and not join: 3241 self._retreat(index) 3242 kind = None 3243 method = None 3244 side = None 3245 3246 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3247 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3248 3249 if not skip_join_token and not join and not outer_apply and not cross_apply: 3250 return None 3251 3252 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3253 3254 if method: 3255 kwargs["method"] = method.text 3256 if side: 3257 kwargs["side"] = side.text 3258 if kind: 3259 kwargs["kind"] = kind.text 3260 if hint: 3261 kwargs["hint"] = hint 3262 3263 if self._match(TokenType.MATCH_CONDITION): 3264 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3265 3266 if self._match(TokenType.ON): 3267 kwargs["on"] = self._parse_assignment() 3268 elif self._match(TokenType.USING): 3269 kwargs["using"] = self._parse_using_identifiers() 3270 elif ( 3271 not (outer_apply or cross_apply) 3272 and not isinstance(kwargs["this"], exp.Unnest) 3273 and not (kind and kind.token_type == TokenType.CROSS) 3274 ): 3275 index = self._index 3276 joins: t.Optional[list] = list(self._parse_joins()) 3277 3278 if joins and self._match(TokenType.ON): 3279 kwargs["on"] = self._parse_assignment() 3280 elif joins and self._match(TokenType.USING): 3281 kwargs["using"] = self._parse_using_identifiers() 3282 else: 3283 joins = None 3284 self._retreat(index) 3285 3286 kwargs["this"].set("joins", joins if joins else None) 3287 3288 comments = [c for token in (method, side, kind) if token for c in token.comments] 3289 return self.expression(exp.Join, comments=comments, **kwargs) 3290 3291 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3292 this = self._parse_assignment() 3293 3294 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3295 return this 3296 3297 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3298 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3299 3300 return this 3301 3302 def _parse_index_params(self) -> exp.IndexParameters: 3303 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3304 3305 if self._match(TokenType.L_PAREN, advance=False): 3306 columns = self._parse_wrapped_csv(self._parse_with_operator) 3307 else: 3308 columns = None 3309 3310 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3311 partition_by = self._parse_partition_by() 3312 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3313 tablespace = ( 3314 self._parse_var(any_token=True) 3315 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3316 else None 3317 ) 3318 where = self._parse_where() 3319 3320 on = self._parse_field() if self._match(TokenType.ON) else None 3321 3322 return self.expression( 3323 exp.IndexParameters, 3324 using=using, 3325 columns=columns, 3326 include=include, 3327 partition_by=partition_by, 3328 where=where, 3329 with_storage=with_storage, 3330 tablespace=tablespace, 3331 on=on, 3332 ) 3333 3334 def _parse_index( 3335 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3336 ) -> t.Optional[exp.Index]: 3337 if index or anonymous: 3338 unique = None 3339 primary = None 3340 amp = None 3341 3342 self._match(TokenType.ON) 3343 self._match(TokenType.TABLE) # hive 3344 table = self._parse_table_parts(schema=True) 3345 else: 3346 unique = self._match(TokenType.UNIQUE) 3347 primary = self._match_text_seq("PRIMARY") 3348 amp = self._match_text_seq("AMP") 3349 3350 if not self._match(TokenType.INDEX): 3351 return None 3352 3353 index = self._parse_id_var() 3354 table = None 3355 3356 params = self._parse_index_params() 3357 3358 return self.expression( 3359 exp.Index, 3360 this=index, 3361 table=table, 3362 unique=unique, 3363 primary=primary, 3364 amp=amp, 3365 params=params, 3366 ) 3367 3368 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3369 hints: t.List[exp.Expression] = [] 3370 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3371 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3372 hints.append( 3373 self.expression( 3374 exp.WithTableHint, 3375 expressions=self._parse_csv( 3376 lambda: self._parse_function() or self._parse_var(any_token=True) 3377 ), 3378 ) 3379 ) 3380 self._match_r_paren() 3381 else: 3382 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3383 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3384 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3385 3386 self._match_set((TokenType.INDEX, TokenType.KEY)) 3387 if self._match(TokenType.FOR): 3388 hint.set("target", self._advance_any() and self._prev.text.upper()) 3389 3390 hint.set("expressions", self._parse_wrapped_id_vars()) 3391 hints.append(hint) 3392 3393 return hints or None 3394 3395 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3396 return ( 3397 (not schema and self._parse_function(optional_parens=False)) 3398 or self._parse_id_var(any_token=False) 3399 or self._parse_string_as_identifier() 3400 or self._parse_placeholder() 3401 ) 3402 3403 def _parse_table_parts( 3404 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3405 ) -> exp.Table: 3406 catalog = None 3407 db = None 3408 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3409 3410 while self._match(TokenType.DOT): 3411 if catalog: 3412 # This allows nesting the table in arbitrarily many dot expressions if needed 3413 table = self.expression( 3414 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3415 ) 3416 else: 3417 catalog = db 3418 db = table 3419 # "" used for tsql FROM a..b case 3420 table = self._parse_table_part(schema=schema) or "" 3421 3422 if ( 3423 wildcard 3424 and self._is_connected() 3425 and (isinstance(table, exp.Identifier) or not table) 3426 and self._match(TokenType.STAR) 3427 ): 3428 if isinstance(table, exp.Identifier): 3429 table.args["this"] += "*" 3430 else: 3431 table = exp.Identifier(this="*") 3432 3433 # We bubble up comments from the Identifier to the Table 3434 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3435 3436 if is_db_reference: 3437 catalog = db 3438 db = table 3439 table = None 3440 3441 if not table and not is_db_reference: 3442 self.raise_error(f"Expected table name but got {self._curr}") 3443 if not db and is_db_reference: 3444 self.raise_error(f"Expected database name but got {self._curr}") 3445 3446 table = self.expression( 3447 exp.Table, 3448 comments=comments, 3449 this=table, 3450 db=db, 3451 catalog=catalog, 3452 ) 3453 3454 changes = self._parse_changes() 3455 if changes: 3456 table.set("changes", changes) 3457 3458 at_before = self._parse_historical_data() 3459 if at_before: 3460 table.set("when", at_before) 3461 3462 pivots = self._parse_pivots() 3463 if pivots: 3464 table.set("pivots", pivots) 3465 3466 return table 3467 3468 def _parse_table( 3469 self, 3470 schema: bool = False, 3471 joins: bool = False, 3472 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3473 parse_bracket: bool = False, 3474 is_db_reference: bool = False, 3475 parse_partition: bool = False, 3476 ) -> t.Optional[exp.Expression]: 3477 lateral = self._parse_lateral() 3478 if lateral: 3479 return lateral 3480 3481 unnest = self._parse_unnest() 3482 if unnest: 3483 return unnest 3484 3485 values = self._parse_derived_table_values() 3486 if values: 3487 return values 3488 3489 subquery = self._parse_select(table=True) 3490 if subquery: 3491 if not subquery.args.get("pivots"): 3492 subquery.set("pivots", self._parse_pivots()) 3493 return subquery 3494 3495 bracket = parse_bracket and self._parse_bracket(None) 3496 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3497 3498 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3499 self._parse_table 3500 ) 3501 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3502 3503 only = self._match(TokenType.ONLY) 3504 3505 this = t.cast( 3506 exp.Expression, 3507 bracket 3508 or rows_from 3509 or self._parse_bracket( 3510 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3511 ), 3512 ) 3513 3514 if only: 3515 this.set("only", only) 3516 3517 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3518 self._match_text_seq("*") 3519 3520 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3521 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3522 this.set("partition", self._parse_partition()) 3523 3524 if schema: 3525 return self._parse_schema(this=this) 3526 3527 version = self._parse_version() 3528 3529 if version: 3530 this.set("version", version) 3531 3532 if self.dialect.ALIAS_POST_TABLESAMPLE: 3533 table_sample = self._parse_table_sample() 3534 3535 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3536 if alias: 3537 this.set("alias", alias) 3538 3539 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3540 return self.expression( 3541 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3542 ) 3543 3544 this.set("hints", self._parse_table_hints()) 3545 3546 if not this.args.get("pivots"): 3547 this.set("pivots", self._parse_pivots()) 3548 3549 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3550 table_sample = self._parse_table_sample() 3551 3552 if table_sample: 3553 table_sample.set("this", this) 3554 this = table_sample 3555 3556 if joins: 3557 for join in self._parse_joins(): 3558 this.append("joins", join) 3559 3560 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3561 this.set("ordinality", True) 3562 this.set("alias", self._parse_table_alias()) 3563 3564 return this 3565 3566 def _parse_version(self) -> t.Optional[exp.Version]: 3567 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3568 this = "TIMESTAMP" 3569 elif self._match(TokenType.VERSION_SNAPSHOT): 3570 this = "VERSION" 3571 else: 3572 return None 3573 3574 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3575 kind = self._prev.text.upper() 3576 start = self._parse_bitwise() 3577 self._match_texts(("TO", "AND")) 3578 end = self._parse_bitwise() 3579 expression: t.Optional[exp.Expression] = self.expression( 3580 exp.Tuple, expressions=[start, end] 3581 ) 3582 elif self._match_text_seq("CONTAINED", "IN"): 3583 kind = "CONTAINED IN" 3584 expression = self.expression( 3585 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3586 ) 3587 elif self._match(TokenType.ALL): 3588 kind = "ALL" 3589 expression = None 3590 else: 3591 self._match_text_seq("AS", "OF") 3592 kind = "AS OF" 3593 expression = self._parse_type() 3594 3595 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3596 3597 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3598 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3599 index = self._index 3600 historical_data = None 3601 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3602 this = self._prev.text.upper() 3603 kind = ( 3604 self._match(TokenType.L_PAREN) 3605 and self._match_texts(self.HISTORICAL_DATA_KIND) 3606 and self._prev.text.upper() 3607 ) 3608 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3609 3610 if expression: 3611 self._match_r_paren() 3612 historical_data = self.expression( 3613 exp.HistoricalData, this=this, kind=kind, expression=expression 3614 ) 3615 else: 3616 self._retreat(index) 3617 3618 return historical_data 3619 3620 def _parse_changes(self) -> t.Optional[exp.Changes]: 3621 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3622 return None 3623 3624 information = self._parse_var(any_token=True) 3625 self._match_r_paren() 3626 3627 return self.expression( 3628 exp.Changes, 3629 information=information, 3630 at_before=self._parse_historical_data(), 3631 end=self._parse_historical_data(), 3632 ) 3633 3634 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3635 if not self._match(TokenType.UNNEST): 3636 return None 3637 3638 expressions = self._parse_wrapped_csv(self._parse_equality) 3639 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3640 3641 alias = self._parse_table_alias() if with_alias else None 3642 3643 if alias: 3644 if self.dialect.UNNEST_COLUMN_ONLY: 3645 if alias.args.get("columns"): 3646 self.raise_error("Unexpected extra column alias in unnest.") 3647 3648 alias.set("columns", [alias.this]) 3649 alias.set("this", None) 3650 3651 columns = alias.args.get("columns") or [] 3652 if offset and len(expressions) < len(columns): 3653 offset = columns.pop() 3654 3655 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3656 self._match(TokenType.ALIAS) 3657 offset = self._parse_id_var( 3658 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3659 ) or exp.to_identifier("offset") 3660 3661 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3662 3663 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3664 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3665 if not is_derived and not ( 3666 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3667 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3668 ): 3669 return None 3670 3671 expressions = self._parse_csv(self._parse_value) 3672 alias = self._parse_table_alias() 3673 3674 if is_derived: 3675 self._match_r_paren() 3676 3677 return self.expression( 3678 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3679 ) 3680 3681 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3682 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3683 as_modifier and self._match_text_seq("USING", "SAMPLE") 3684 ): 3685 return None 3686 3687 bucket_numerator = None 3688 bucket_denominator = None 3689 bucket_field = None 3690 percent = None 3691 size = None 3692 seed = None 3693 3694 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3695 matched_l_paren = self._match(TokenType.L_PAREN) 3696 3697 if self.TABLESAMPLE_CSV: 3698 num = None 3699 expressions = self._parse_csv(self._parse_primary) 3700 else: 3701 expressions = None 3702 num = ( 3703 self._parse_factor() 3704 if self._match(TokenType.NUMBER, advance=False) 3705 else self._parse_primary() or self._parse_placeholder() 3706 ) 3707 3708 if self._match_text_seq("BUCKET"): 3709 bucket_numerator = self._parse_number() 3710 self._match_text_seq("OUT", "OF") 3711 bucket_denominator = bucket_denominator = self._parse_number() 3712 self._match(TokenType.ON) 3713 bucket_field = self._parse_field() 3714 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3715 percent = num 3716 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3717 size = num 3718 else: 3719 percent = num 3720 3721 if matched_l_paren: 3722 self._match_r_paren() 3723 3724 if self._match(TokenType.L_PAREN): 3725 method = self._parse_var(upper=True) 3726 seed = self._match(TokenType.COMMA) and self._parse_number() 3727 self._match_r_paren() 3728 elif self._match_texts(("SEED", "REPEATABLE")): 3729 seed = self._parse_wrapped(self._parse_number) 3730 3731 if not method and self.DEFAULT_SAMPLING_METHOD: 3732 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3733 3734 return self.expression( 3735 exp.TableSample, 3736 expressions=expressions, 3737 method=method, 3738 bucket_numerator=bucket_numerator, 3739 bucket_denominator=bucket_denominator, 3740 bucket_field=bucket_field, 3741 percent=percent, 3742 size=size, 3743 seed=seed, 3744 ) 3745 3746 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3747 return list(iter(self._parse_pivot, None)) or None 3748 3749 def _parse_joins(self) -> t.Iterator[exp.Join]: 3750 return iter(self._parse_join, None) 3751 3752 # https://duckdb.org/docs/sql/statements/pivot 3753 def _parse_simplified_pivot(self) -> exp.Pivot: 3754 def _parse_on() -> t.Optional[exp.Expression]: 3755 this = self._parse_bitwise() 3756 return self._parse_in(this) if self._match(TokenType.IN) else this 3757 3758 this = self._parse_table() 3759 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3760 using = self._match(TokenType.USING) and self._parse_csv( 3761 lambda: self._parse_alias(self._parse_function()) 3762 ) 3763 group = self._parse_group() 3764 return self.expression( 3765 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3766 ) 3767 3768 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3769 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3770 this = self._parse_select_or_expression() 3771 3772 self._match(TokenType.ALIAS) 3773 alias = self._parse_field() 3774 if alias: 3775 return self.expression(exp.PivotAlias, this=this, alias=alias) 3776 3777 return this 3778 3779 value = self._parse_column() 3780 3781 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3782 self.raise_error("Expecting IN (") 3783 3784 if self._match(TokenType.ANY): 3785 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3786 else: 3787 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3788 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3789 3790 self._match_r_paren() 3791 return expr 3792 3793 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3794 index = self._index 3795 include_nulls = None 3796 3797 if self._match(TokenType.PIVOT): 3798 unpivot = False 3799 elif self._match(TokenType.UNPIVOT): 3800 unpivot = True 3801 3802 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3803 if self._match_text_seq("INCLUDE", "NULLS"): 3804 include_nulls = True 3805 elif self._match_text_seq("EXCLUDE", "NULLS"): 3806 include_nulls = False 3807 else: 3808 return None 3809 3810 expressions = [] 3811 3812 if not self._match(TokenType.L_PAREN): 3813 self._retreat(index) 3814 return None 3815 3816 if unpivot: 3817 expressions = self._parse_csv(self._parse_column) 3818 else: 3819 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3820 3821 if not expressions: 3822 self.raise_error("Failed to parse PIVOT's aggregation list") 3823 3824 if not self._match(TokenType.FOR): 3825 self.raise_error("Expecting FOR") 3826 3827 field = self._parse_pivot_in() 3828 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3829 self._parse_bitwise 3830 ) 3831 3832 self._match_r_paren() 3833 3834 pivot = self.expression( 3835 exp.Pivot, 3836 expressions=expressions, 3837 field=field, 3838 unpivot=unpivot, 3839 include_nulls=include_nulls, 3840 default_on_null=default_on_null, 3841 ) 3842 3843 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3844 pivot.set("alias", self._parse_table_alias()) 3845 3846 if not unpivot: 3847 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3848 3849 columns: t.List[exp.Expression] = [] 3850 for fld in pivot.args["field"].expressions: 3851 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3852 for name in names: 3853 if self.PREFIXED_PIVOT_COLUMNS: 3854 name = f"{name}_{field_name}" if name else field_name 3855 else: 3856 name = f"{field_name}_{name}" if name else field_name 3857 3858 columns.append(exp.to_identifier(name)) 3859 3860 pivot.set("columns", columns) 3861 3862 return pivot 3863 3864 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3865 return [agg.alias for agg in aggregations] 3866 3867 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3868 if not skip_where_token and not self._match(TokenType.PREWHERE): 3869 return None 3870 3871 return self.expression( 3872 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3873 ) 3874 3875 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3876 if not skip_where_token and not self._match(TokenType.WHERE): 3877 return None 3878 3879 return self.expression( 3880 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3881 ) 3882 3883 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3884 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3885 return None 3886 3887 elements: t.Dict[str, t.Any] = defaultdict(list) 3888 3889 if self._match(TokenType.ALL): 3890 elements["all"] = True 3891 elif self._match(TokenType.DISTINCT): 3892 elements["all"] = False 3893 3894 while True: 3895 expressions = self._parse_csv( 3896 lambda: None 3897 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3898 else self._parse_assignment() 3899 ) 3900 if expressions: 3901 elements["expressions"].extend(expressions) 3902 3903 grouping_sets = self._parse_grouping_sets() 3904 if grouping_sets: 3905 elements["grouping_sets"].extend(grouping_sets) 3906 3907 rollup = None 3908 cube = None 3909 totals = None 3910 3911 index = self._index 3912 with_ = self._match(TokenType.WITH) 3913 if self._match(TokenType.ROLLUP): 3914 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3915 elements["rollup"].extend(ensure_list(rollup)) 3916 3917 if self._match(TokenType.CUBE): 3918 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3919 elements["cube"].extend(ensure_list(cube)) 3920 3921 if self._match_text_seq("TOTALS"): 3922 totals = True 3923 elements["totals"] = True # type: ignore 3924 3925 if not (grouping_sets or rollup or cube or totals): 3926 if with_: 3927 self._retreat(index) 3928 break 3929 3930 return self.expression(exp.Group, **elements) # type: ignore 3931 3932 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3933 if not self._match(TokenType.GROUPING_SETS): 3934 return None 3935 3936 return self._parse_wrapped_csv(self._parse_grouping_set) 3937 3938 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3939 if self._match(TokenType.L_PAREN): 3940 grouping_set = self._parse_csv(self._parse_column) 3941 self._match_r_paren() 3942 return self.expression(exp.Tuple, expressions=grouping_set) 3943 3944 return self._parse_column() 3945 3946 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3947 if not skip_having_token and not self._match(TokenType.HAVING): 3948 return None 3949 return self.expression(exp.Having, this=self._parse_assignment()) 3950 3951 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3952 if not self._match(TokenType.QUALIFY): 3953 return None 3954 return self.expression(exp.Qualify, this=self._parse_assignment()) 3955 3956 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3957 if skip_start_token: 3958 start = None 3959 elif self._match(TokenType.START_WITH): 3960 start = self._parse_assignment() 3961 else: 3962 return None 3963 3964 self._match(TokenType.CONNECT_BY) 3965 nocycle = self._match_text_seq("NOCYCLE") 3966 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3967 exp.Prior, this=self._parse_bitwise() 3968 ) 3969 connect = self._parse_assignment() 3970 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3971 3972 if not start and self._match(TokenType.START_WITH): 3973 start = self._parse_assignment() 3974 3975 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3976 3977 def _parse_name_as_expression(self) -> exp.Alias: 3978 return self.expression( 3979 exp.Alias, 3980 alias=self._parse_id_var(any_token=True), 3981 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3982 ) 3983 3984 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3985 if self._match_text_seq("INTERPOLATE"): 3986 return self._parse_wrapped_csv(self._parse_name_as_expression) 3987 return None 3988 3989 def _parse_order( 3990 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3991 ) -> t.Optional[exp.Expression]: 3992 siblings = None 3993 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3994 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3995 return this 3996 3997 siblings = True 3998 3999 return self.expression( 4000 exp.Order, 4001 this=this, 4002 expressions=self._parse_csv(self._parse_ordered), 4003 interpolate=self._parse_interpolate(), 4004 siblings=siblings, 4005 ) 4006 4007 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4008 if not self._match(token): 4009 return None 4010 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4011 4012 def _parse_ordered( 4013 self, parse_method: t.Optional[t.Callable] = None 4014 ) -> t.Optional[exp.Ordered]: 4015 this = parse_method() if parse_method else self._parse_assignment() 4016 if not this: 4017 return None 4018 4019 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4020 this = exp.var("ALL") 4021 4022 asc = self._match(TokenType.ASC) 4023 desc = self._match(TokenType.DESC) or (asc and False) 4024 4025 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4026 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4027 4028 nulls_first = is_nulls_first or False 4029 explicitly_null_ordered = is_nulls_first or is_nulls_last 4030 4031 if ( 4032 not explicitly_null_ordered 4033 and ( 4034 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4035 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4036 ) 4037 and self.dialect.NULL_ORDERING != "nulls_are_last" 4038 ): 4039 nulls_first = True 4040 4041 if self._match_text_seq("WITH", "FILL"): 4042 with_fill = self.expression( 4043 exp.WithFill, 4044 **{ # type: ignore 4045 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4046 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4047 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4048 }, 4049 ) 4050 else: 4051 with_fill = None 4052 4053 return self.expression( 4054 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4055 ) 4056 4057 def _parse_limit( 4058 self, 4059 this: t.Optional[exp.Expression] = None, 4060 top: bool = False, 4061 skip_limit_token: bool = False, 4062 ) -> t.Optional[exp.Expression]: 4063 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4064 comments = self._prev_comments 4065 if top: 4066 limit_paren = self._match(TokenType.L_PAREN) 4067 expression = self._parse_term() if limit_paren else self._parse_number() 4068 4069 if limit_paren: 4070 self._match_r_paren() 4071 else: 4072 expression = self._parse_term() 4073 4074 if self._match(TokenType.COMMA): 4075 offset = expression 4076 expression = self._parse_term() 4077 else: 4078 offset = None 4079 4080 limit_exp = self.expression( 4081 exp.Limit, 4082 this=this, 4083 expression=expression, 4084 offset=offset, 4085 comments=comments, 4086 expressions=self._parse_limit_by(), 4087 ) 4088 4089 return limit_exp 4090 4091 if self._match(TokenType.FETCH): 4092 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4093 direction = self._prev.text.upper() if direction else "FIRST" 4094 4095 count = self._parse_field(tokens=self.FETCH_TOKENS) 4096 percent = self._match(TokenType.PERCENT) 4097 4098 self._match_set((TokenType.ROW, TokenType.ROWS)) 4099 4100 only = self._match_text_seq("ONLY") 4101 with_ties = self._match_text_seq("WITH", "TIES") 4102 4103 if only and with_ties: 4104 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4105 4106 return self.expression( 4107 exp.Fetch, 4108 direction=direction, 4109 count=count, 4110 percent=percent, 4111 with_ties=with_ties, 4112 ) 4113 4114 return this 4115 4116 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4117 if not self._match(TokenType.OFFSET): 4118 return this 4119 4120 count = self._parse_term() 4121 self._match_set((TokenType.ROW, TokenType.ROWS)) 4122 4123 return self.expression( 4124 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4125 ) 4126 4127 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4128 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4129 4130 def _parse_locks(self) -> t.List[exp.Lock]: 4131 locks = [] 4132 while True: 4133 if self._match_text_seq("FOR", "UPDATE"): 4134 update = True 4135 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4136 "LOCK", "IN", "SHARE", "MODE" 4137 ): 4138 update = False 4139 else: 4140 break 4141 4142 expressions = None 4143 if self._match_text_seq("OF"): 4144 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4145 4146 wait: t.Optional[bool | exp.Expression] = None 4147 if self._match_text_seq("NOWAIT"): 4148 wait = True 4149 elif self._match_text_seq("WAIT"): 4150 wait = self._parse_primary() 4151 elif self._match_text_seq("SKIP", "LOCKED"): 4152 wait = False 4153 4154 locks.append( 4155 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4156 ) 4157 4158 return locks 4159 4160 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4161 while this and self._match_set(self.SET_OPERATIONS): 4162 token_type = self._prev.token_type 4163 4164 if token_type == TokenType.UNION: 4165 operation: t.Type[exp.SetOperation] = exp.Union 4166 elif token_type == TokenType.EXCEPT: 4167 operation = exp.Except 4168 else: 4169 operation = exp.Intersect 4170 4171 comments = self._prev.comments 4172 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4173 by_name = self._match_text_seq("BY", "NAME") 4174 expression = self._parse_select(nested=True, parse_set_operation=False) 4175 4176 this = self.expression( 4177 operation, 4178 comments=comments, 4179 this=this, 4180 distinct=distinct, 4181 by_name=by_name, 4182 expression=expression, 4183 ) 4184 4185 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4186 expression = this.expression 4187 4188 if expression: 4189 for arg in self.SET_OP_MODIFIERS: 4190 expr = expression.args.get(arg) 4191 if expr: 4192 this.set(arg, expr.pop()) 4193 4194 return this 4195 4196 def _parse_expression(self) -> t.Optional[exp.Expression]: 4197 return self._parse_alias(self._parse_assignment()) 4198 4199 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4200 this = self._parse_disjunction() 4201 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4202 # This allows us to parse <non-identifier token> := <expr> 4203 this = exp.column( 4204 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4205 ) 4206 4207 while self._match_set(self.ASSIGNMENT): 4208 this = self.expression( 4209 self.ASSIGNMENT[self._prev.token_type], 4210 this=this, 4211 comments=self._prev_comments, 4212 expression=self._parse_assignment(), 4213 ) 4214 4215 return this 4216 4217 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4218 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4219 4220 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4221 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4222 4223 def _parse_equality(self) -> t.Optional[exp.Expression]: 4224 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4225 4226 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4227 return self._parse_tokens(self._parse_range, self.COMPARISON) 4228 4229 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4230 this = this or self._parse_bitwise() 4231 negate = self._match(TokenType.NOT) 4232 4233 if self._match_set(self.RANGE_PARSERS): 4234 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4235 if not expression: 4236 return this 4237 4238 this = expression 4239 elif self._match(TokenType.ISNULL): 4240 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4241 4242 # Postgres supports ISNULL and NOTNULL for conditions. 4243 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4244 if self._match(TokenType.NOTNULL): 4245 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4246 this = self.expression(exp.Not, this=this) 4247 4248 if negate: 4249 this = self._negate_range(this) 4250 4251 if self._match(TokenType.IS): 4252 this = self._parse_is(this) 4253 4254 return this 4255 4256 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4257 if not this: 4258 return this 4259 4260 return self.expression(exp.Not, this=this) 4261 4262 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4263 index = self._index - 1 4264 negate = self._match(TokenType.NOT) 4265 4266 if self._match_text_seq("DISTINCT", "FROM"): 4267 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4268 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4269 4270 expression = self._parse_null() or self._parse_boolean() 4271 if not expression: 4272 self._retreat(index) 4273 return None 4274 4275 this = self.expression(exp.Is, this=this, expression=expression) 4276 return self.expression(exp.Not, this=this) if negate else this 4277 4278 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4279 unnest = self._parse_unnest(with_alias=False) 4280 if unnest: 4281 this = self.expression(exp.In, this=this, unnest=unnest) 4282 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4283 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4284 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4285 4286 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4287 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4288 else: 4289 this = self.expression(exp.In, this=this, expressions=expressions) 4290 4291 if matched_l_paren: 4292 self._match_r_paren(this) 4293 elif not self._match(TokenType.R_BRACKET, expression=this): 4294 self.raise_error("Expecting ]") 4295 else: 4296 this = self.expression(exp.In, this=this, field=self._parse_field()) 4297 4298 return this 4299 4300 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4301 low = self._parse_bitwise() 4302 self._match(TokenType.AND) 4303 high = self._parse_bitwise() 4304 return self.expression(exp.Between, this=this, low=low, high=high) 4305 4306 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4307 if not self._match(TokenType.ESCAPE): 4308 return this 4309 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4310 4311 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4312 index = self._index 4313 4314 if not self._match(TokenType.INTERVAL) and match_interval: 4315 return None 4316 4317 if self._match(TokenType.STRING, advance=False): 4318 this = self._parse_primary() 4319 else: 4320 this = self._parse_term() 4321 4322 if not this or ( 4323 isinstance(this, exp.Column) 4324 and not this.table 4325 and not this.this.quoted 4326 and this.name.upper() == "IS" 4327 ): 4328 self._retreat(index) 4329 return None 4330 4331 unit = self._parse_function() or ( 4332 not self._match(TokenType.ALIAS, advance=False) 4333 and self._parse_var(any_token=True, upper=True) 4334 ) 4335 4336 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4337 # each INTERVAL expression into this canonical form so it's easy to transpile 4338 if this and this.is_number: 4339 this = exp.Literal.string(this.to_py()) 4340 elif this and this.is_string: 4341 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4342 if len(parts) == 1: 4343 if unit: 4344 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4345 self._retreat(self._index - 1) 4346 4347 this = exp.Literal.string(parts[0][0]) 4348 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4349 4350 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4351 unit = self.expression( 4352 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4353 ) 4354 4355 interval = self.expression(exp.Interval, this=this, unit=unit) 4356 4357 index = self._index 4358 self._match(TokenType.PLUS) 4359 4360 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4361 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4362 return self.expression( 4363 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4364 ) 4365 4366 self._retreat(index) 4367 return interval 4368 4369 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4370 this = self._parse_term() 4371 4372 while True: 4373 if self._match_set(self.BITWISE): 4374 this = self.expression( 4375 self.BITWISE[self._prev.token_type], 4376 this=this, 4377 expression=self._parse_term(), 4378 ) 4379 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4380 this = self.expression( 4381 exp.DPipe, 4382 this=this, 4383 expression=self._parse_term(), 4384 safe=not self.dialect.STRICT_STRING_CONCAT, 4385 ) 4386 elif self._match(TokenType.DQMARK): 4387 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4388 elif self._match_pair(TokenType.LT, TokenType.LT): 4389 this = self.expression( 4390 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4391 ) 4392 elif self._match_pair(TokenType.GT, TokenType.GT): 4393 this = self.expression( 4394 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4395 ) 4396 else: 4397 break 4398 4399 return this 4400 4401 def _parse_term(self) -> t.Optional[exp.Expression]: 4402 this = self._parse_factor() 4403 4404 while self._match_set(self.TERM): 4405 klass = self.TERM[self._prev.token_type] 4406 comments = self._prev_comments 4407 expression = self._parse_factor() 4408 4409 this = self.expression(klass, this=this, comments=comments, expression=expression) 4410 4411 if isinstance(this, exp.Collate): 4412 expr = this.expression 4413 4414 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4415 # fallback to Identifier / Var 4416 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4417 ident = expr.this 4418 if isinstance(ident, exp.Identifier): 4419 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4420 4421 return this 4422 4423 def _parse_factor(self) -> t.Optional[exp.Expression]: 4424 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4425 this = parse_method() 4426 4427 while self._match_set(self.FACTOR): 4428 klass = self.FACTOR[self._prev.token_type] 4429 comments = self._prev_comments 4430 expression = parse_method() 4431 4432 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4433 self._retreat(self._index - 1) 4434 return this 4435 4436 this = self.expression(klass, this=this, comments=comments, expression=expression) 4437 4438 if isinstance(this, exp.Div): 4439 this.args["typed"] = self.dialect.TYPED_DIVISION 4440 this.args["safe"] = self.dialect.SAFE_DIVISION 4441 4442 return this 4443 4444 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4445 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4446 4447 def _parse_unary(self) -> t.Optional[exp.Expression]: 4448 if self._match_set(self.UNARY_PARSERS): 4449 return self.UNARY_PARSERS[self._prev.token_type](self) 4450 return self._parse_at_time_zone(self._parse_type()) 4451 4452 def _parse_type( 4453 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4454 ) -> t.Optional[exp.Expression]: 4455 interval = parse_interval and self._parse_interval() 4456 if interval: 4457 return interval 4458 4459 index = self._index 4460 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4461 4462 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4463 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4464 if isinstance(data_type, exp.Cast): 4465 # This constructor can contain ops directly after it, for instance struct unnesting: 4466 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4467 return self._parse_column_ops(data_type) 4468 4469 if data_type: 4470 index2 = self._index 4471 this = self._parse_primary() 4472 4473 if isinstance(this, exp.Literal): 4474 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4475 if parser: 4476 return parser(self, this, data_type) 4477 4478 return self.expression(exp.Cast, this=this, to=data_type) 4479 4480 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4481 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4482 # 4483 # If the index difference here is greater than 1, that means the parser itself must have 4484 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4485 # 4486 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4487 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4488 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4489 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4490 # 4491 # In these cases, we don't really want to return the converted type, but instead retreat 4492 # and try to parse a Column or Identifier in the section below. 4493 if data_type.expressions and index2 - index > 1: 4494 self._retreat(index2) 4495 return self._parse_column_ops(data_type) 4496 4497 self._retreat(index) 4498 4499 if fallback_to_identifier: 4500 return self._parse_id_var() 4501 4502 this = self._parse_column() 4503 return this and self._parse_column_ops(this) 4504 4505 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4506 this = self._parse_type() 4507 if not this: 4508 return None 4509 4510 if isinstance(this, exp.Column) and not this.table: 4511 this = exp.var(this.name.upper()) 4512 4513 return self.expression( 4514 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4515 ) 4516 4517 def _parse_types( 4518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4519 ) -> t.Optional[exp.Expression]: 4520 index = self._index 4521 4522 this: t.Optional[exp.Expression] = None 4523 prefix = self._match_text_seq("SYSUDTLIB", ".") 4524 4525 if not self._match_set(self.TYPE_TOKENS): 4526 identifier = allow_identifiers and self._parse_id_var( 4527 any_token=False, tokens=(TokenType.VAR,) 4528 ) 4529 if isinstance(identifier, exp.Identifier): 4530 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4531 4532 if len(tokens) != 1: 4533 self.raise_error("Unexpected identifier", self._prev) 4534 4535 if tokens[0].token_type in self.TYPE_TOKENS: 4536 self._prev = tokens[0] 4537 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4538 type_name = identifier.name 4539 4540 while self._match(TokenType.DOT): 4541 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4542 4543 this = exp.DataType.build(type_name, udt=True) 4544 else: 4545 self._retreat(self._index - 1) 4546 return None 4547 else: 4548 return None 4549 4550 type_token = self._prev.token_type 4551 4552 if type_token == TokenType.PSEUDO_TYPE: 4553 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4554 4555 if type_token == TokenType.OBJECT_IDENTIFIER: 4556 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4557 4558 # https://materialize.com/docs/sql/types/map/ 4559 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4560 key_type = self._parse_types( 4561 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4562 ) 4563 if not self._match(TokenType.FARROW): 4564 self._retreat(index) 4565 return None 4566 4567 value_type = self._parse_types( 4568 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4569 ) 4570 if not self._match(TokenType.R_BRACKET): 4571 self._retreat(index) 4572 return None 4573 4574 return exp.DataType( 4575 this=exp.DataType.Type.MAP, 4576 expressions=[key_type, value_type], 4577 nested=True, 4578 prefix=prefix, 4579 ) 4580 4581 nested = type_token in self.NESTED_TYPE_TOKENS 4582 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4583 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4584 expressions = None 4585 maybe_func = False 4586 4587 if self._match(TokenType.L_PAREN): 4588 if is_struct: 4589 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4590 elif nested: 4591 expressions = self._parse_csv( 4592 lambda: self._parse_types( 4593 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4594 ) 4595 ) 4596 elif type_token in self.ENUM_TYPE_TOKENS: 4597 expressions = self._parse_csv(self._parse_equality) 4598 elif is_aggregate: 4599 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4600 any_token=False, tokens=(TokenType.VAR,) 4601 ) 4602 if not func_or_ident or not self._match(TokenType.COMMA): 4603 return None 4604 expressions = self._parse_csv( 4605 lambda: self._parse_types( 4606 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4607 ) 4608 ) 4609 expressions.insert(0, func_or_ident) 4610 else: 4611 expressions = self._parse_csv(self._parse_type_size) 4612 4613 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4614 if type_token == TokenType.VECTOR and len(expressions) == 2: 4615 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4616 4617 if not expressions or not self._match(TokenType.R_PAREN): 4618 self._retreat(index) 4619 return None 4620 4621 maybe_func = True 4622 4623 values: t.Optional[t.List[exp.Expression]] = None 4624 4625 if nested and self._match(TokenType.LT): 4626 if is_struct: 4627 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4628 else: 4629 expressions = self._parse_csv( 4630 lambda: self._parse_types( 4631 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4632 ) 4633 ) 4634 4635 if not self._match(TokenType.GT): 4636 self.raise_error("Expecting >") 4637 4638 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4639 values = self._parse_csv(self._parse_assignment) 4640 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4641 4642 if type_token in self.TIMESTAMPS: 4643 if self._match_text_seq("WITH", "TIME", "ZONE"): 4644 maybe_func = False 4645 tz_type = ( 4646 exp.DataType.Type.TIMETZ 4647 if type_token in self.TIMES 4648 else exp.DataType.Type.TIMESTAMPTZ 4649 ) 4650 this = exp.DataType(this=tz_type, expressions=expressions) 4651 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4652 maybe_func = False 4653 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4654 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4655 maybe_func = False 4656 elif type_token == TokenType.INTERVAL: 4657 unit = self._parse_var(upper=True) 4658 if unit: 4659 if self._match_text_seq("TO"): 4660 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4661 4662 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4663 else: 4664 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4665 4666 if maybe_func and check_func: 4667 index2 = self._index 4668 peek = self._parse_string() 4669 4670 if not peek: 4671 self._retreat(index) 4672 return None 4673 4674 self._retreat(index2) 4675 4676 if not this: 4677 if self._match_text_seq("UNSIGNED"): 4678 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4679 if not unsigned_type_token: 4680 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4681 4682 type_token = unsigned_type_token or type_token 4683 4684 this = exp.DataType( 4685 this=exp.DataType.Type[type_token.value], 4686 expressions=expressions, 4687 nested=nested, 4688 prefix=prefix, 4689 ) 4690 4691 # Empty arrays/structs are allowed 4692 if values is not None: 4693 cls = exp.Struct if is_struct else exp.Array 4694 this = exp.cast(cls(expressions=values), this, copy=False) 4695 4696 elif expressions: 4697 this.set("expressions", expressions) 4698 4699 # https://materialize.com/docs/sql/types/list/#type-name 4700 while self._match(TokenType.LIST): 4701 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4702 4703 index = self._index 4704 4705 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4706 matched_array = self._match(TokenType.ARRAY) 4707 4708 while self._curr: 4709 datatype_token = self._prev.token_type 4710 matched_l_bracket = self._match(TokenType.L_BRACKET) 4711 if not matched_l_bracket and not matched_array: 4712 break 4713 4714 matched_array = False 4715 values = self._parse_csv(self._parse_assignment) or None 4716 if ( 4717 values 4718 and not schema 4719 and ( 4720 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4721 ) 4722 ): 4723 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4724 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4725 self._retreat(index) 4726 break 4727 4728 this = exp.DataType( 4729 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4730 ) 4731 self._match(TokenType.R_BRACKET) 4732 4733 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4734 converter = self.TYPE_CONVERTERS.get(this.this) 4735 if converter: 4736 this = converter(t.cast(exp.DataType, this)) 4737 4738 return this 4739 4740 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4741 index = self._index 4742 4743 if ( 4744 self._curr 4745 and self._next 4746 and self._curr.token_type in self.TYPE_TOKENS 4747 and self._next.token_type in self.TYPE_TOKENS 4748 ): 4749 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4750 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4751 this = self._parse_id_var() 4752 else: 4753 this = ( 4754 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4755 or self._parse_id_var() 4756 ) 4757 4758 self._match(TokenType.COLON) 4759 4760 if ( 4761 type_required 4762 and not isinstance(this, exp.DataType) 4763 and not self._match_set(self.TYPE_TOKENS, advance=False) 4764 ): 4765 self._retreat(index) 4766 return self._parse_types() 4767 4768 return self._parse_column_def(this) 4769 4770 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4771 if not self._match_text_seq("AT", "TIME", "ZONE"): 4772 return this 4773 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4774 4775 def _parse_column(self) -> t.Optional[exp.Expression]: 4776 this = self._parse_column_reference() 4777 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4778 4779 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4780 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4781 4782 return column 4783 4784 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4785 this = self._parse_field() 4786 if ( 4787 not this 4788 and self._match(TokenType.VALUES, advance=False) 4789 and self.VALUES_FOLLOWED_BY_PAREN 4790 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4791 ): 4792 this = self._parse_id_var() 4793 4794 if isinstance(this, exp.Identifier): 4795 # We bubble up comments from the Identifier to the Column 4796 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4797 4798 return this 4799 4800 def _parse_colon_as_variant_extract( 4801 self, this: t.Optional[exp.Expression] 4802 ) -> t.Optional[exp.Expression]: 4803 casts = [] 4804 json_path = [] 4805 4806 while self._match(TokenType.COLON): 4807 start_index = self._index 4808 4809 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4810 path = self._parse_column_ops( 4811 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4812 ) 4813 4814 # The cast :: operator has a lower precedence than the extraction operator :, so 4815 # we rearrange the AST appropriately to avoid casting the JSON path 4816 while isinstance(path, exp.Cast): 4817 casts.append(path.to) 4818 path = path.this 4819 4820 if casts: 4821 dcolon_offset = next( 4822 i 4823 for i, t in enumerate(self._tokens[start_index:]) 4824 if t.token_type == TokenType.DCOLON 4825 ) 4826 end_token = self._tokens[start_index + dcolon_offset - 1] 4827 else: 4828 end_token = self._prev 4829 4830 if path: 4831 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4832 4833 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4834 # Databricks transforms it back to the colon/dot notation 4835 if json_path: 4836 this = self.expression( 4837 exp.JSONExtract, 4838 this=this, 4839 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4840 variant_extract=True, 4841 ) 4842 4843 while casts: 4844 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4845 4846 return this 4847 4848 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4849 return self._parse_types() 4850 4851 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4852 this = self._parse_bracket(this) 4853 4854 while self._match_set(self.COLUMN_OPERATORS): 4855 op_token = self._prev.token_type 4856 op = self.COLUMN_OPERATORS.get(op_token) 4857 4858 if op_token == TokenType.DCOLON: 4859 field = self._parse_dcolon() 4860 if not field: 4861 self.raise_error("Expected type") 4862 elif op and self._curr: 4863 field = self._parse_column_reference() 4864 else: 4865 field = self._parse_field(any_token=True, anonymous_func=True) 4866 4867 if isinstance(field, exp.Func) and this: 4868 # bigquery allows function calls like x.y.count(...) 4869 # SAFE.SUBSTR(...) 4870 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4871 this = exp.replace_tree( 4872 this, 4873 lambda n: ( 4874 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4875 if n.table 4876 else n.this 4877 ) 4878 if isinstance(n, exp.Column) 4879 else n, 4880 ) 4881 4882 if op: 4883 this = op(self, this, field) 4884 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4885 this = self.expression( 4886 exp.Column, 4887 this=field, 4888 table=this.this, 4889 db=this.args.get("table"), 4890 catalog=this.args.get("db"), 4891 ) 4892 else: 4893 this = self.expression(exp.Dot, this=this, expression=field) 4894 4895 this = self._parse_bracket(this) 4896 4897 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4898 4899 def _parse_primary(self) -> t.Optional[exp.Expression]: 4900 if self._match_set(self.PRIMARY_PARSERS): 4901 token_type = self._prev.token_type 4902 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4903 4904 if token_type == TokenType.STRING: 4905 expressions = [primary] 4906 while self._match(TokenType.STRING): 4907 expressions.append(exp.Literal.string(self._prev.text)) 4908 4909 if len(expressions) > 1: 4910 return self.expression(exp.Concat, expressions=expressions) 4911 4912 return primary 4913 4914 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4915 return exp.Literal.number(f"0.{self._prev.text}") 4916 4917 if self._match(TokenType.L_PAREN): 4918 comments = self._prev_comments 4919 query = self._parse_select() 4920 4921 if query: 4922 expressions = [query] 4923 else: 4924 expressions = self._parse_expressions() 4925 4926 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4927 4928 if not this and self._match(TokenType.R_PAREN, advance=False): 4929 this = self.expression(exp.Tuple) 4930 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4931 this = self._parse_subquery(this=this, parse_alias=False) 4932 elif isinstance(this, exp.Subquery): 4933 this = self._parse_subquery( 4934 this=self._parse_set_operations(this), parse_alias=False 4935 ) 4936 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4937 this = self.expression(exp.Tuple, expressions=expressions) 4938 else: 4939 this = self.expression(exp.Paren, this=this) 4940 4941 if this: 4942 this.add_comments(comments) 4943 4944 self._match_r_paren(expression=this) 4945 return this 4946 4947 return None 4948 4949 def _parse_field( 4950 self, 4951 any_token: bool = False, 4952 tokens: t.Optional[t.Collection[TokenType]] = None, 4953 anonymous_func: bool = False, 4954 ) -> t.Optional[exp.Expression]: 4955 if anonymous_func: 4956 field = ( 4957 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4958 or self._parse_primary() 4959 ) 4960 else: 4961 field = self._parse_primary() or self._parse_function( 4962 anonymous=anonymous_func, any_token=any_token 4963 ) 4964 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4965 4966 def _parse_function( 4967 self, 4968 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4969 anonymous: bool = False, 4970 optional_parens: bool = True, 4971 any_token: bool = False, 4972 ) -> t.Optional[exp.Expression]: 4973 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4974 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4975 fn_syntax = False 4976 if ( 4977 self._match(TokenType.L_BRACE, advance=False) 4978 and self._next 4979 and self._next.text.upper() == "FN" 4980 ): 4981 self._advance(2) 4982 fn_syntax = True 4983 4984 func = self._parse_function_call( 4985 functions=functions, 4986 anonymous=anonymous, 4987 optional_parens=optional_parens, 4988 any_token=any_token, 4989 ) 4990 4991 if fn_syntax: 4992 self._match(TokenType.R_BRACE) 4993 4994 return func 4995 4996 def _parse_function_call( 4997 self, 4998 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4999 anonymous: bool = False, 5000 optional_parens: bool = True, 5001 any_token: bool = False, 5002 ) -> t.Optional[exp.Expression]: 5003 if not self._curr: 5004 return None 5005 5006 comments = self._curr.comments 5007 token_type = self._curr.token_type 5008 this = self._curr.text 5009 upper = this.upper() 5010 5011 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5012 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5013 self._advance() 5014 return self._parse_window(parser(self)) 5015 5016 if not self._next or self._next.token_type != TokenType.L_PAREN: 5017 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5018 self._advance() 5019 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5020 5021 return None 5022 5023 if any_token: 5024 if token_type in self.RESERVED_TOKENS: 5025 return None 5026 elif token_type not in self.FUNC_TOKENS: 5027 return None 5028 5029 self._advance(2) 5030 5031 parser = self.FUNCTION_PARSERS.get(upper) 5032 if parser and not anonymous: 5033 this = parser(self) 5034 else: 5035 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5036 5037 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5038 this = self.expression(subquery_predicate, this=self._parse_select()) 5039 self._match_r_paren() 5040 return this 5041 5042 if functions is None: 5043 functions = self.FUNCTIONS 5044 5045 function = functions.get(upper) 5046 5047 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5048 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5049 5050 if alias: 5051 args = self._kv_to_prop_eq(args) 5052 5053 if function and not anonymous: 5054 if "dialect" in function.__code__.co_varnames: 5055 func = function(args, dialect=self.dialect) 5056 else: 5057 func = function(args) 5058 5059 func = self.validate_expression(func, args) 5060 if not self.dialect.NORMALIZE_FUNCTIONS: 5061 func.meta["name"] = this 5062 5063 this = func 5064 else: 5065 if token_type == TokenType.IDENTIFIER: 5066 this = exp.Identifier(this=this, quoted=True) 5067 this = self.expression(exp.Anonymous, this=this, expressions=args) 5068 5069 if isinstance(this, exp.Expression): 5070 this.add_comments(comments) 5071 5072 self._match_r_paren(this) 5073 return self._parse_window(this) 5074 5075 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5076 transformed = [] 5077 5078 for e in expressions: 5079 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5080 if isinstance(e, exp.Alias): 5081 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5082 5083 if not isinstance(e, exp.PropertyEQ): 5084 e = self.expression( 5085 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5086 ) 5087 5088 if isinstance(e.this, exp.Column): 5089 e.this.replace(e.this.this) 5090 5091 transformed.append(e) 5092 5093 return transformed 5094 5095 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5096 return self._parse_column_def(self._parse_id_var()) 5097 5098 def _parse_user_defined_function( 5099 self, kind: t.Optional[TokenType] = None 5100 ) -> t.Optional[exp.Expression]: 5101 this = self._parse_id_var() 5102 5103 while self._match(TokenType.DOT): 5104 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5105 5106 if not self._match(TokenType.L_PAREN): 5107 return this 5108 5109 expressions = self._parse_csv(self._parse_function_parameter) 5110 self._match_r_paren() 5111 return self.expression( 5112 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5113 ) 5114 5115 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5116 literal = self._parse_primary() 5117 if literal: 5118 return self.expression(exp.Introducer, this=token.text, expression=literal) 5119 5120 return self.expression(exp.Identifier, this=token.text) 5121 5122 def _parse_session_parameter(self) -> exp.SessionParameter: 5123 kind = None 5124 this = self._parse_id_var() or self._parse_primary() 5125 5126 if this and self._match(TokenType.DOT): 5127 kind = this.name 5128 this = self._parse_var() or self._parse_primary() 5129 5130 return self.expression(exp.SessionParameter, this=this, kind=kind) 5131 5132 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5133 return self._parse_id_var() 5134 5135 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5136 index = self._index 5137 5138 if self._match(TokenType.L_PAREN): 5139 expressions = t.cast( 5140 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5141 ) 5142 5143 if not self._match(TokenType.R_PAREN): 5144 self._retreat(index) 5145 else: 5146 expressions = [self._parse_lambda_arg()] 5147 5148 if self._match_set(self.LAMBDAS): 5149 return self.LAMBDAS[self._prev.token_type](self, expressions) 5150 5151 self._retreat(index) 5152 5153 this: t.Optional[exp.Expression] 5154 5155 if self._match(TokenType.DISTINCT): 5156 this = self.expression( 5157 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5158 ) 5159 else: 5160 this = self._parse_select_or_expression(alias=alias) 5161 5162 return self._parse_limit( 5163 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5164 ) 5165 5166 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5167 index = self._index 5168 if not self._match(TokenType.L_PAREN): 5169 return this 5170 5171 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5172 # expr can be of both types 5173 if self._match_set(self.SELECT_START_TOKENS): 5174 self._retreat(index) 5175 return this 5176 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5177 self._match_r_paren() 5178 return self.expression(exp.Schema, this=this, expressions=args) 5179 5180 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5181 return self._parse_column_def(self._parse_field(any_token=True)) 5182 5183 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5184 # column defs are not really columns, they're identifiers 5185 if isinstance(this, exp.Column): 5186 this = this.this 5187 5188 kind = self._parse_types(schema=True) 5189 5190 if self._match_text_seq("FOR", "ORDINALITY"): 5191 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5192 5193 constraints: t.List[exp.Expression] = [] 5194 5195 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5196 ("ALIAS", "MATERIALIZED") 5197 ): 5198 persisted = self._prev.text.upper() == "MATERIALIZED" 5199 constraints.append( 5200 self.expression( 5201 exp.ComputedColumnConstraint, 5202 this=self._parse_assignment(), 5203 persisted=persisted or self._match_text_seq("PERSISTED"), 5204 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5205 ) 5206 ) 5207 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5208 self._match(TokenType.ALIAS) 5209 constraints.append( 5210 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5211 ) 5212 5213 while True: 5214 constraint = self._parse_column_constraint() 5215 if not constraint: 5216 break 5217 constraints.append(constraint) 5218 5219 if not kind and not constraints: 5220 return this 5221 5222 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5223 5224 def _parse_auto_increment( 5225 self, 5226 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5227 start = None 5228 increment = None 5229 5230 if self._match(TokenType.L_PAREN, advance=False): 5231 args = self._parse_wrapped_csv(self._parse_bitwise) 5232 start = seq_get(args, 0) 5233 increment = seq_get(args, 1) 5234 elif self._match_text_seq("START"): 5235 start = self._parse_bitwise() 5236 self._match_text_seq("INCREMENT") 5237 increment = self._parse_bitwise() 5238 5239 if start and increment: 5240 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5241 5242 return exp.AutoIncrementColumnConstraint() 5243 5244 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5245 if not self._match_text_seq("REFRESH"): 5246 self._retreat(self._index - 1) 5247 return None 5248 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5249 5250 def _parse_compress(self) -> exp.CompressColumnConstraint: 5251 if self._match(TokenType.L_PAREN, advance=False): 5252 return self.expression( 5253 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5254 ) 5255 5256 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5257 5258 def _parse_generated_as_identity( 5259 self, 5260 ) -> ( 5261 exp.GeneratedAsIdentityColumnConstraint 5262 | exp.ComputedColumnConstraint 5263 | exp.GeneratedAsRowColumnConstraint 5264 ): 5265 if self._match_text_seq("BY", "DEFAULT"): 5266 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5267 this = self.expression( 5268 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5269 ) 5270 else: 5271 self._match_text_seq("ALWAYS") 5272 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5273 5274 self._match(TokenType.ALIAS) 5275 5276 if self._match_text_seq("ROW"): 5277 start = self._match_text_seq("START") 5278 if not start: 5279 self._match(TokenType.END) 5280 hidden = self._match_text_seq("HIDDEN") 5281 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5282 5283 identity = self._match_text_seq("IDENTITY") 5284 5285 if self._match(TokenType.L_PAREN): 5286 if self._match(TokenType.START_WITH): 5287 this.set("start", self._parse_bitwise()) 5288 if self._match_text_seq("INCREMENT", "BY"): 5289 this.set("increment", self._parse_bitwise()) 5290 if self._match_text_seq("MINVALUE"): 5291 this.set("minvalue", self._parse_bitwise()) 5292 if self._match_text_seq("MAXVALUE"): 5293 this.set("maxvalue", self._parse_bitwise()) 5294 5295 if self._match_text_seq("CYCLE"): 5296 this.set("cycle", True) 5297 elif self._match_text_seq("NO", "CYCLE"): 5298 this.set("cycle", False) 5299 5300 if not identity: 5301 this.set("expression", self._parse_range()) 5302 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5303 args = self._parse_csv(self._parse_bitwise) 5304 this.set("start", seq_get(args, 0)) 5305 this.set("increment", seq_get(args, 1)) 5306 5307 self._match_r_paren() 5308 5309 return this 5310 5311 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5312 self._match_text_seq("LENGTH") 5313 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5314 5315 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5316 if self._match_text_seq("NULL"): 5317 return self.expression(exp.NotNullColumnConstraint) 5318 if self._match_text_seq("CASESPECIFIC"): 5319 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5320 if self._match_text_seq("FOR", "REPLICATION"): 5321 return self.expression(exp.NotForReplicationColumnConstraint) 5322 return None 5323 5324 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5325 if self._match(TokenType.CONSTRAINT): 5326 this = self._parse_id_var() 5327 else: 5328 this = None 5329 5330 if self._match_texts(self.CONSTRAINT_PARSERS): 5331 return self.expression( 5332 exp.ColumnConstraint, 5333 this=this, 5334 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5335 ) 5336 5337 return this 5338 5339 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5340 if not self._match(TokenType.CONSTRAINT): 5341 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5342 5343 return self.expression( 5344 exp.Constraint, 5345 this=self._parse_id_var(), 5346 expressions=self._parse_unnamed_constraints(), 5347 ) 5348 5349 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5350 constraints = [] 5351 while True: 5352 constraint = self._parse_unnamed_constraint() or self._parse_function() 5353 if not constraint: 5354 break 5355 constraints.append(constraint) 5356 5357 return constraints 5358 5359 def _parse_unnamed_constraint( 5360 self, constraints: t.Optional[t.Collection[str]] = None 5361 ) -> t.Optional[exp.Expression]: 5362 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5363 constraints or self.CONSTRAINT_PARSERS 5364 ): 5365 return None 5366 5367 constraint = self._prev.text.upper() 5368 if constraint not in self.CONSTRAINT_PARSERS: 5369 self.raise_error(f"No parser found for schema constraint {constraint}.") 5370 5371 return self.CONSTRAINT_PARSERS[constraint](self) 5372 5373 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5374 return self._parse_id_var(any_token=False) 5375 5376 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5377 self._match_text_seq("KEY") 5378 return self.expression( 5379 exp.UniqueColumnConstraint, 5380 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5381 this=self._parse_schema(self._parse_unique_key()), 5382 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5383 on_conflict=self._parse_on_conflict(), 5384 ) 5385 5386 def _parse_key_constraint_options(self) -> t.List[str]: 5387 options = [] 5388 while True: 5389 if not self._curr: 5390 break 5391 5392 if self._match(TokenType.ON): 5393 action = None 5394 on = self._advance_any() and self._prev.text 5395 5396 if self._match_text_seq("NO", "ACTION"): 5397 action = "NO ACTION" 5398 elif self._match_text_seq("CASCADE"): 5399 action = "CASCADE" 5400 elif self._match_text_seq("RESTRICT"): 5401 action = "RESTRICT" 5402 elif self._match_pair(TokenType.SET, TokenType.NULL): 5403 action = "SET NULL" 5404 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5405 action = "SET DEFAULT" 5406 else: 5407 self.raise_error("Invalid key constraint") 5408 5409 options.append(f"ON {on} {action}") 5410 else: 5411 var = self._parse_var_from_options( 5412 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5413 ) 5414 if not var: 5415 break 5416 options.append(var.name) 5417 5418 return options 5419 5420 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5421 if match and not self._match(TokenType.REFERENCES): 5422 return None 5423 5424 expressions = None 5425 this = self._parse_table(schema=True) 5426 options = self._parse_key_constraint_options() 5427 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5428 5429 def _parse_foreign_key(self) -> exp.ForeignKey: 5430 expressions = self._parse_wrapped_id_vars() 5431 reference = self._parse_references() 5432 options = {} 5433 5434 while self._match(TokenType.ON): 5435 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5436 self.raise_error("Expected DELETE or UPDATE") 5437 5438 kind = self._prev.text.lower() 5439 5440 if self._match_text_seq("NO", "ACTION"): 5441 action = "NO ACTION" 5442 elif self._match(TokenType.SET): 5443 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5444 action = "SET " + self._prev.text.upper() 5445 else: 5446 self._advance() 5447 action = self._prev.text.upper() 5448 5449 options[kind] = action 5450 5451 return self.expression( 5452 exp.ForeignKey, 5453 expressions=expressions, 5454 reference=reference, 5455 **options, # type: ignore 5456 ) 5457 5458 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5459 return self._parse_field() 5460 5461 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5462 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5463 self._retreat(self._index - 1) 5464 return None 5465 5466 id_vars = self._parse_wrapped_id_vars() 5467 return self.expression( 5468 exp.PeriodForSystemTimeConstraint, 5469 this=seq_get(id_vars, 0), 5470 expression=seq_get(id_vars, 1), 5471 ) 5472 5473 def _parse_primary_key( 5474 self, wrapped_optional: bool = False, in_props: bool = False 5475 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5476 desc = ( 5477 self._match_set((TokenType.ASC, TokenType.DESC)) 5478 and self._prev.token_type == TokenType.DESC 5479 ) 5480 5481 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5482 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5483 5484 expressions = self._parse_wrapped_csv( 5485 self._parse_primary_key_part, optional=wrapped_optional 5486 ) 5487 options = self._parse_key_constraint_options() 5488 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5489 5490 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5492 5493 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5494 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5495 return this 5496 5497 bracket_kind = self._prev.token_type 5498 expressions = self._parse_csv( 5499 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5500 ) 5501 5502 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5503 self.raise_error("Expected ]") 5504 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5505 self.raise_error("Expected }") 5506 5507 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5508 if bracket_kind == TokenType.L_BRACE: 5509 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5510 elif not this: 5511 this = build_array_constructor( 5512 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5513 ) 5514 else: 5515 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5516 if constructor_type: 5517 return build_array_constructor( 5518 constructor_type, 5519 args=expressions, 5520 bracket_kind=bracket_kind, 5521 dialect=self.dialect, 5522 ) 5523 5524 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5525 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5526 5527 self._add_comments(this) 5528 return self._parse_bracket(this) 5529 5530 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5531 if self._match(TokenType.COLON): 5532 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5533 return this 5534 5535 def _parse_case(self) -> t.Optional[exp.Expression]: 5536 ifs = [] 5537 default = None 5538 5539 comments = self._prev_comments 5540 expression = self._parse_assignment() 5541 5542 while self._match(TokenType.WHEN): 5543 this = self._parse_assignment() 5544 self._match(TokenType.THEN) 5545 then = self._parse_assignment() 5546 ifs.append(self.expression(exp.If, this=this, true=then)) 5547 5548 if self._match(TokenType.ELSE): 5549 default = self._parse_assignment() 5550 5551 if not self._match(TokenType.END): 5552 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5553 default = exp.column("interval") 5554 else: 5555 self.raise_error("Expected END after CASE", self._prev) 5556 5557 return self.expression( 5558 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5559 ) 5560 5561 def _parse_if(self) -> t.Optional[exp.Expression]: 5562 if self._match(TokenType.L_PAREN): 5563 args = self._parse_csv(self._parse_assignment) 5564 this = self.validate_expression(exp.If.from_arg_list(args), args) 5565 self._match_r_paren() 5566 else: 5567 index = self._index - 1 5568 5569 if self.NO_PAREN_IF_COMMANDS and index == 0: 5570 return self._parse_as_command(self._prev) 5571 5572 condition = self._parse_assignment() 5573 5574 if not condition: 5575 self._retreat(index) 5576 return None 5577 5578 self._match(TokenType.THEN) 5579 true = self._parse_assignment() 5580 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5581 self._match(TokenType.END) 5582 this = self.expression(exp.If, this=condition, true=true, false=false) 5583 5584 return this 5585 5586 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5587 if not self._match_text_seq("VALUE", "FOR"): 5588 self._retreat(self._index - 1) 5589 return None 5590 5591 return self.expression( 5592 exp.NextValueFor, 5593 this=self._parse_column(), 5594 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5595 ) 5596 5597 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5598 this = self._parse_function() or self._parse_var_or_string(upper=True) 5599 5600 if self._match(TokenType.FROM): 5601 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5602 5603 if not self._match(TokenType.COMMA): 5604 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5605 5606 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5607 5608 def _parse_gap_fill(self) -> exp.GapFill: 5609 self._match(TokenType.TABLE) 5610 this = self._parse_table() 5611 5612 self._match(TokenType.COMMA) 5613 args = [this, *self._parse_csv(self._parse_lambda)] 5614 5615 gap_fill = exp.GapFill.from_arg_list(args) 5616 return self.validate_expression(gap_fill, args) 5617 5618 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5619 this = self._parse_assignment() 5620 5621 if not self._match(TokenType.ALIAS): 5622 if self._match(TokenType.COMMA): 5623 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5624 5625 self.raise_error("Expected AS after CAST") 5626 5627 fmt = None 5628 to = self._parse_types() 5629 5630 if self._match(TokenType.FORMAT): 5631 fmt_string = self._parse_string() 5632 fmt = self._parse_at_time_zone(fmt_string) 5633 5634 if not to: 5635 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5636 if to.this in exp.DataType.TEMPORAL_TYPES: 5637 this = self.expression( 5638 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5639 this=this, 5640 format=exp.Literal.string( 5641 format_time( 5642 fmt_string.this if fmt_string else "", 5643 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5644 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5645 ) 5646 ), 5647 safe=safe, 5648 ) 5649 5650 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5651 this.set("zone", fmt.args["zone"]) 5652 return this 5653 elif not to: 5654 self.raise_error("Expected TYPE after CAST") 5655 elif isinstance(to, exp.Identifier): 5656 to = exp.DataType.build(to.name, udt=True) 5657 elif to.this == exp.DataType.Type.CHAR: 5658 if self._match(TokenType.CHARACTER_SET): 5659 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5660 5661 return self.expression( 5662 exp.Cast if strict else exp.TryCast, 5663 this=this, 5664 to=to, 5665 format=fmt, 5666 safe=safe, 5667 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5668 ) 5669 5670 def _parse_string_agg(self) -> exp.Expression: 5671 if self._match(TokenType.DISTINCT): 5672 args: t.List[t.Optional[exp.Expression]] = [ 5673 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5674 ] 5675 if self._match(TokenType.COMMA): 5676 args.extend(self._parse_csv(self._parse_assignment)) 5677 else: 5678 args = self._parse_csv(self._parse_assignment) # type: ignore 5679 5680 index = self._index 5681 if not self._match(TokenType.R_PAREN) and args: 5682 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5683 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5684 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5685 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5686 5687 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5688 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5689 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5690 if not self._match_text_seq("WITHIN", "GROUP"): 5691 self._retreat(index) 5692 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5693 5694 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5695 order = self._parse_order(this=seq_get(args, 0)) 5696 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5697 5698 def _parse_convert( 5699 self, strict: bool, safe: t.Optional[bool] = None 5700 ) -> t.Optional[exp.Expression]: 5701 this = self._parse_bitwise() 5702 5703 if self._match(TokenType.USING): 5704 to: t.Optional[exp.Expression] = self.expression( 5705 exp.CharacterSet, this=self._parse_var() 5706 ) 5707 elif self._match(TokenType.COMMA): 5708 to = self._parse_types() 5709 else: 5710 to = None 5711 5712 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5713 5714 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5715 """ 5716 There are generally two variants of the DECODE function: 5717 5718 - DECODE(bin, charset) 5719 - DECODE(expression, search, result [, search, result] ... [, default]) 5720 5721 The second variant will always be parsed into a CASE expression. Note that NULL 5722 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5723 instead of relying on pattern matching. 5724 """ 5725 args = self._parse_csv(self._parse_assignment) 5726 5727 if len(args) < 3: 5728 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5729 5730 expression, *expressions = args 5731 if not expression: 5732 return None 5733 5734 ifs = [] 5735 for search, result in zip(expressions[::2], expressions[1::2]): 5736 if not search or not result: 5737 return None 5738 5739 if isinstance(search, exp.Literal): 5740 ifs.append( 5741 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5742 ) 5743 elif isinstance(search, exp.Null): 5744 ifs.append( 5745 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5746 ) 5747 else: 5748 cond = exp.or_( 5749 exp.EQ(this=expression.copy(), expression=search), 5750 exp.and_( 5751 exp.Is(this=expression.copy(), expression=exp.Null()), 5752 exp.Is(this=search.copy(), expression=exp.Null()), 5753 copy=False, 5754 ), 5755 copy=False, 5756 ) 5757 ifs.append(exp.If(this=cond, true=result)) 5758 5759 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5760 5761 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5762 self._match_text_seq("KEY") 5763 key = self._parse_column() 5764 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5765 self._match_text_seq("VALUE") 5766 value = self._parse_bitwise() 5767 5768 if not key and not value: 5769 return None 5770 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5771 5772 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5773 if not this or not self._match_text_seq("FORMAT", "JSON"): 5774 return this 5775 5776 return self.expression(exp.FormatJson, this=this) 5777 5778 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5779 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5780 for value in values: 5781 if self._match_text_seq(value, "ON", on): 5782 return f"{value} ON {on}" 5783 5784 return None 5785 5786 @t.overload 5787 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5788 5789 @t.overload 5790 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5791 5792 def _parse_json_object(self, agg=False): 5793 star = self._parse_star() 5794 expressions = ( 5795 [star] 5796 if star 5797 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5798 ) 5799 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5800 5801 unique_keys = None 5802 if self._match_text_seq("WITH", "UNIQUE"): 5803 unique_keys = True 5804 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5805 unique_keys = False 5806 5807 self._match_text_seq("KEYS") 5808 5809 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5810 self._parse_type() 5811 ) 5812 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5813 5814 return self.expression( 5815 exp.JSONObjectAgg if agg else exp.JSONObject, 5816 expressions=expressions, 5817 null_handling=null_handling, 5818 unique_keys=unique_keys, 5819 return_type=return_type, 5820 encoding=encoding, 5821 ) 5822 5823 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5824 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5825 if not self._match_text_seq("NESTED"): 5826 this = self._parse_id_var() 5827 kind = self._parse_types(allow_identifiers=False) 5828 nested = None 5829 else: 5830 this = None 5831 kind = None 5832 nested = True 5833 5834 path = self._match_text_seq("PATH") and self._parse_string() 5835 nested_schema = nested and self._parse_json_schema() 5836 5837 return self.expression( 5838 exp.JSONColumnDef, 5839 this=this, 5840 kind=kind, 5841 path=path, 5842 nested_schema=nested_schema, 5843 ) 5844 5845 def _parse_json_schema(self) -> exp.JSONSchema: 5846 self._match_text_seq("COLUMNS") 5847 return self.expression( 5848 exp.JSONSchema, 5849 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5850 ) 5851 5852 def _parse_json_table(self) -> exp.JSONTable: 5853 this = self._parse_format_json(self._parse_bitwise()) 5854 path = self._match(TokenType.COMMA) and self._parse_string() 5855 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5856 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5857 schema = self._parse_json_schema() 5858 5859 return exp.JSONTable( 5860 this=this, 5861 schema=schema, 5862 path=path, 5863 error_handling=error_handling, 5864 empty_handling=empty_handling, 5865 ) 5866 5867 def _parse_match_against(self) -> exp.MatchAgainst: 5868 expressions = self._parse_csv(self._parse_column) 5869 5870 self._match_text_seq(")", "AGAINST", "(") 5871 5872 this = self._parse_string() 5873 5874 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5875 modifier = "IN NATURAL LANGUAGE MODE" 5876 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5877 modifier = f"{modifier} WITH QUERY EXPANSION" 5878 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5879 modifier = "IN BOOLEAN MODE" 5880 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5881 modifier = "WITH QUERY EXPANSION" 5882 else: 5883 modifier = None 5884 5885 return self.expression( 5886 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5887 ) 5888 5889 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5890 def _parse_open_json(self) -> exp.OpenJSON: 5891 this = self._parse_bitwise() 5892 path = self._match(TokenType.COMMA) and self._parse_string() 5893 5894 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5895 this = self._parse_field(any_token=True) 5896 kind = self._parse_types() 5897 path = self._parse_string() 5898 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5899 5900 return self.expression( 5901 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5902 ) 5903 5904 expressions = None 5905 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5906 self._match_l_paren() 5907 expressions = self._parse_csv(_parse_open_json_column_def) 5908 5909 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5910 5911 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5912 args = self._parse_csv(self._parse_bitwise) 5913 5914 if self._match(TokenType.IN): 5915 return self.expression( 5916 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5917 ) 5918 5919 if haystack_first: 5920 haystack = seq_get(args, 0) 5921 needle = seq_get(args, 1) 5922 else: 5923 needle = seq_get(args, 0) 5924 haystack = seq_get(args, 1) 5925 5926 return self.expression( 5927 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5928 ) 5929 5930 def _parse_predict(self) -> exp.Predict: 5931 self._match_text_seq("MODEL") 5932 this = self._parse_table() 5933 5934 self._match(TokenType.COMMA) 5935 self._match_text_seq("TABLE") 5936 5937 return self.expression( 5938 exp.Predict, 5939 this=this, 5940 expression=self._parse_table(), 5941 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5942 ) 5943 5944 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5945 args = self._parse_csv(self._parse_table) 5946 return exp.JoinHint(this=func_name.upper(), expressions=args) 5947 5948 def _parse_substring(self) -> exp.Substring: 5949 # Postgres supports the form: substring(string [from int] [for int]) 5950 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5951 5952 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5953 5954 if self._match(TokenType.FROM): 5955 args.append(self._parse_bitwise()) 5956 if self._match(TokenType.FOR): 5957 if len(args) == 1: 5958 args.append(exp.Literal.number(1)) 5959 args.append(self._parse_bitwise()) 5960 5961 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5962 5963 def _parse_trim(self) -> exp.Trim: 5964 # https://www.w3resource.com/sql/character-functions/trim.php 5965 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5966 5967 position = None 5968 collation = None 5969 expression = None 5970 5971 if self._match_texts(self.TRIM_TYPES): 5972 position = self._prev.text.upper() 5973 5974 this = self._parse_bitwise() 5975 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5976 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5977 expression = self._parse_bitwise() 5978 5979 if invert_order: 5980 this, expression = expression, this 5981 5982 if self._match(TokenType.COLLATE): 5983 collation = self._parse_bitwise() 5984 5985 return self.expression( 5986 exp.Trim, this=this, position=position, expression=expression, collation=collation 5987 ) 5988 5989 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5990 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5991 5992 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5993 return self._parse_window(self._parse_id_var(), alias=True) 5994 5995 def _parse_respect_or_ignore_nulls( 5996 self, this: t.Optional[exp.Expression] 5997 ) -> t.Optional[exp.Expression]: 5998 if self._match_text_seq("IGNORE", "NULLS"): 5999 return self.expression(exp.IgnoreNulls, this=this) 6000 if self._match_text_seq("RESPECT", "NULLS"): 6001 return self.expression(exp.RespectNulls, this=this) 6002 return this 6003 6004 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6005 if self._match(TokenType.HAVING): 6006 self._match_texts(("MAX", "MIN")) 6007 max = self._prev.text.upper() != "MIN" 6008 return self.expression( 6009 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6010 ) 6011 6012 return this 6013 6014 def _parse_window( 6015 self, this: t.Optional[exp.Expression], alias: bool = False 6016 ) -> t.Optional[exp.Expression]: 6017 func = this 6018 comments = func.comments if isinstance(func, exp.Expression) else None 6019 6020 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6021 self._match(TokenType.WHERE) 6022 this = self.expression( 6023 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6024 ) 6025 self._match_r_paren() 6026 6027 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6028 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6029 if self._match_text_seq("WITHIN", "GROUP"): 6030 order = self._parse_wrapped(self._parse_order) 6031 this = self.expression(exp.WithinGroup, this=this, expression=order) 6032 6033 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6034 # Some dialects choose to implement and some do not. 6035 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6036 6037 # There is some code above in _parse_lambda that handles 6038 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6039 6040 # The below changes handle 6041 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6042 6043 # Oracle allows both formats 6044 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6045 # and Snowflake chose to do the same for familiarity 6046 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6047 if isinstance(this, exp.AggFunc): 6048 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6049 6050 if ignore_respect and ignore_respect is not this: 6051 ignore_respect.replace(ignore_respect.this) 6052 this = self.expression(ignore_respect.__class__, this=this) 6053 6054 this = self._parse_respect_or_ignore_nulls(this) 6055 6056 # bigquery select from window x AS (partition by ...) 6057 if alias: 6058 over = None 6059 self._match(TokenType.ALIAS) 6060 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6061 return this 6062 else: 6063 over = self._prev.text.upper() 6064 6065 if comments and isinstance(func, exp.Expression): 6066 func.pop_comments() 6067 6068 if not self._match(TokenType.L_PAREN): 6069 return self.expression( 6070 exp.Window, 6071 comments=comments, 6072 this=this, 6073 alias=self._parse_id_var(False), 6074 over=over, 6075 ) 6076 6077 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6078 6079 first = self._match(TokenType.FIRST) 6080 if self._match_text_seq("LAST"): 6081 first = False 6082 6083 partition, order = self._parse_partition_and_order() 6084 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6085 6086 if kind: 6087 self._match(TokenType.BETWEEN) 6088 start = self._parse_window_spec() 6089 self._match(TokenType.AND) 6090 end = self._parse_window_spec() 6091 6092 spec = self.expression( 6093 exp.WindowSpec, 6094 kind=kind, 6095 start=start["value"], 6096 start_side=start["side"], 6097 end=end["value"], 6098 end_side=end["side"], 6099 ) 6100 else: 6101 spec = None 6102 6103 self._match_r_paren() 6104 6105 window = self.expression( 6106 exp.Window, 6107 comments=comments, 6108 this=this, 6109 partition_by=partition, 6110 order=order, 6111 spec=spec, 6112 alias=window_alias, 6113 over=over, 6114 first=first, 6115 ) 6116 6117 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6118 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6119 return self._parse_window(window, alias=alias) 6120 6121 return window 6122 6123 def _parse_partition_and_order( 6124 self, 6125 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6126 return self._parse_partition_by(), self._parse_order() 6127 6128 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6129 self._match(TokenType.BETWEEN) 6130 6131 return { 6132 "value": ( 6133 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6134 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6135 or self._parse_bitwise() 6136 ), 6137 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6138 } 6139 6140 def _parse_alias( 6141 self, this: t.Optional[exp.Expression], explicit: bool = False 6142 ) -> t.Optional[exp.Expression]: 6143 any_token = self._match(TokenType.ALIAS) 6144 comments = self._prev_comments or [] 6145 6146 if explicit and not any_token: 6147 return this 6148 6149 if self._match(TokenType.L_PAREN): 6150 aliases = self.expression( 6151 exp.Aliases, 6152 comments=comments, 6153 this=this, 6154 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6155 ) 6156 self._match_r_paren(aliases) 6157 return aliases 6158 6159 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6160 self.STRING_ALIASES and self._parse_string_as_identifier() 6161 ) 6162 6163 if alias: 6164 comments.extend(alias.pop_comments()) 6165 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6166 column = this.this 6167 6168 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6169 if not this.comments and column and column.comments: 6170 this.comments = column.pop_comments() 6171 6172 return this 6173 6174 def _parse_id_var( 6175 self, 6176 any_token: bool = True, 6177 tokens: t.Optional[t.Collection[TokenType]] = None, 6178 ) -> t.Optional[exp.Expression]: 6179 expression = self._parse_identifier() 6180 if not expression and ( 6181 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6182 ): 6183 quoted = self._prev.token_type == TokenType.STRING 6184 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6185 6186 return expression 6187 6188 def _parse_string(self) -> t.Optional[exp.Expression]: 6189 if self._match_set(self.STRING_PARSERS): 6190 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6191 return self._parse_placeholder() 6192 6193 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6194 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6195 6196 def _parse_number(self) -> t.Optional[exp.Expression]: 6197 if self._match_set(self.NUMERIC_PARSERS): 6198 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6199 return self._parse_placeholder() 6200 6201 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6202 if self._match(TokenType.IDENTIFIER): 6203 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6204 return self._parse_placeholder() 6205 6206 def _parse_var( 6207 self, 6208 any_token: bool = False, 6209 tokens: t.Optional[t.Collection[TokenType]] = None, 6210 upper: bool = False, 6211 ) -> t.Optional[exp.Expression]: 6212 if ( 6213 (any_token and self._advance_any()) 6214 or self._match(TokenType.VAR) 6215 or (self._match_set(tokens) if tokens else False) 6216 ): 6217 return self.expression( 6218 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6219 ) 6220 return self._parse_placeholder() 6221 6222 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6223 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6224 self._advance() 6225 return self._prev 6226 return None 6227 6228 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6229 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6230 6231 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6232 return self._parse_primary() or self._parse_var(any_token=True) 6233 6234 def _parse_null(self) -> t.Optional[exp.Expression]: 6235 if self._match_set(self.NULL_TOKENS): 6236 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6237 return self._parse_placeholder() 6238 6239 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6240 if self._match(TokenType.TRUE): 6241 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6242 if self._match(TokenType.FALSE): 6243 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6244 return self._parse_placeholder() 6245 6246 def _parse_star(self) -> t.Optional[exp.Expression]: 6247 if self._match(TokenType.STAR): 6248 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6249 return self._parse_placeholder() 6250 6251 def _parse_parameter(self) -> exp.Parameter: 6252 this = self._parse_identifier() or self._parse_primary_or_var() 6253 return self.expression(exp.Parameter, this=this) 6254 6255 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6256 if self._match_set(self.PLACEHOLDER_PARSERS): 6257 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6258 if placeholder: 6259 return placeholder 6260 self._advance(-1) 6261 return None 6262 6263 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6264 if not self._match_texts(keywords): 6265 return None 6266 if self._match(TokenType.L_PAREN, advance=False): 6267 return self._parse_wrapped_csv(self._parse_expression) 6268 6269 expression = self._parse_expression() 6270 return [expression] if expression else None 6271 6272 def _parse_csv( 6273 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6274 ) -> t.List[exp.Expression]: 6275 parse_result = parse_method() 6276 items = [parse_result] if parse_result is not None else [] 6277 6278 while self._match(sep): 6279 self._add_comments(parse_result) 6280 parse_result = parse_method() 6281 if parse_result is not None: 6282 items.append(parse_result) 6283 6284 return items 6285 6286 def _parse_tokens( 6287 self, parse_method: t.Callable, expressions: t.Dict 6288 ) -> t.Optional[exp.Expression]: 6289 this = parse_method() 6290 6291 while self._match_set(expressions): 6292 this = self.expression( 6293 expressions[self._prev.token_type], 6294 this=this, 6295 comments=self._prev_comments, 6296 expression=parse_method(), 6297 ) 6298 6299 return this 6300 6301 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6302 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6303 6304 def _parse_wrapped_csv( 6305 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6306 ) -> t.List[exp.Expression]: 6307 return self._parse_wrapped( 6308 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6309 ) 6310 6311 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6312 wrapped = self._match(TokenType.L_PAREN) 6313 if not wrapped and not optional: 6314 self.raise_error("Expecting (") 6315 parse_result = parse_method() 6316 if wrapped: 6317 self._match_r_paren() 6318 return parse_result 6319 6320 def _parse_expressions(self) -> t.List[exp.Expression]: 6321 return self._parse_csv(self._parse_expression) 6322 6323 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6324 return self._parse_select() or self._parse_set_operations( 6325 self._parse_expression() if alias else self._parse_assignment() 6326 ) 6327 6328 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6329 return self._parse_query_modifiers( 6330 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6331 ) 6332 6333 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6334 this = None 6335 if self._match_texts(self.TRANSACTION_KIND): 6336 this = self._prev.text 6337 6338 self._match_texts(("TRANSACTION", "WORK")) 6339 6340 modes = [] 6341 while True: 6342 mode = [] 6343 while self._match(TokenType.VAR): 6344 mode.append(self._prev.text) 6345 6346 if mode: 6347 modes.append(" ".join(mode)) 6348 if not self._match(TokenType.COMMA): 6349 break 6350 6351 return self.expression(exp.Transaction, this=this, modes=modes) 6352 6353 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6354 chain = None 6355 savepoint = None 6356 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6357 6358 self._match_texts(("TRANSACTION", "WORK")) 6359 6360 if self._match_text_seq("TO"): 6361 self._match_text_seq("SAVEPOINT") 6362 savepoint = self._parse_id_var() 6363 6364 if self._match(TokenType.AND): 6365 chain = not self._match_text_seq("NO") 6366 self._match_text_seq("CHAIN") 6367 6368 if is_rollback: 6369 return self.expression(exp.Rollback, savepoint=savepoint) 6370 6371 return self.expression(exp.Commit, chain=chain) 6372 6373 def _parse_refresh(self) -> exp.Refresh: 6374 self._match(TokenType.TABLE) 6375 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6376 6377 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6378 if not self._match_text_seq("ADD"): 6379 return None 6380 6381 self._match(TokenType.COLUMN) 6382 exists_column = self._parse_exists(not_=True) 6383 expression = self._parse_field_def() 6384 6385 if expression: 6386 expression.set("exists", exists_column) 6387 6388 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6389 if self._match_texts(("FIRST", "AFTER")): 6390 position = self._prev.text 6391 column_position = self.expression( 6392 exp.ColumnPosition, this=self._parse_column(), position=position 6393 ) 6394 expression.set("position", column_position) 6395 6396 return expression 6397 6398 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6399 drop = self._match(TokenType.DROP) and self._parse_drop() 6400 if drop and not isinstance(drop, exp.Command): 6401 drop.set("kind", drop.args.get("kind", "COLUMN")) 6402 return drop 6403 6404 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6405 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6406 return self.expression( 6407 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6408 ) 6409 6410 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6411 index = self._index - 1 6412 6413 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6414 return self._parse_csv( 6415 lambda: self.expression( 6416 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6417 ) 6418 ) 6419 6420 self._retreat(index) 6421 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6422 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6423 6424 if self._match_text_seq("ADD", "COLUMNS"): 6425 schema = self._parse_schema() 6426 if schema: 6427 return [schema] 6428 return [] 6429 6430 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6431 6432 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6433 if self._match_texts(self.ALTER_ALTER_PARSERS): 6434 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6435 6436 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6437 # keyword after ALTER we default to parsing this statement 6438 self._match(TokenType.COLUMN) 6439 column = self._parse_field(any_token=True) 6440 6441 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6442 return self.expression(exp.AlterColumn, this=column, drop=True) 6443 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6444 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6445 if self._match(TokenType.COMMENT): 6446 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6447 if self._match_text_seq("DROP", "NOT", "NULL"): 6448 return self.expression( 6449 exp.AlterColumn, 6450 this=column, 6451 drop=True, 6452 allow_null=True, 6453 ) 6454 if self._match_text_seq("SET", "NOT", "NULL"): 6455 return self.expression( 6456 exp.AlterColumn, 6457 this=column, 6458 allow_null=False, 6459 ) 6460 self._match_text_seq("SET", "DATA") 6461 self._match_text_seq("TYPE") 6462 return self.expression( 6463 exp.AlterColumn, 6464 this=column, 6465 dtype=self._parse_types(), 6466 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6467 using=self._match(TokenType.USING) and self._parse_assignment(), 6468 ) 6469 6470 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6471 if self._match_texts(("ALL", "EVEN", "AUTO")): 6472 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6473 6474 self._match_text_seq("KEY", "DISTKEY") 6475 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6476 6477 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6478 if compound: 6479 self._match_text_seq("SORTKEY") 6480 6481 if self._match(TokenType.L_PAREN, advance=False): 6482 return self.expression( 6483 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6484 ) 6485 6486 self._match_texts(("AUTO", "NONE")) 6487 return self.expression( 6488 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6489 ) 6490 6491 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6492 index = self._index - 1 6493 6494 partition_exists = self._parse_exists() 6495 if self._match(TokenType.PARTITION, advance=False): 6496 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6497 6498 self._retreat(index) 6499 return self._parse_csv(self._parse_drop_column) 6500 6501 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6502 if self._match(TokenType.COLUMN): 6503 exists = self._parse_exists() 6504 old_column = self._parse_column() 6505 to = self._match_text_seq("TO") 6506 new_column = self._parse_column() 6507 6508 if old_column is None or to is None or new_column is None: 6509 return None 6510 6511 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6512 6513 self._match_text_seq("TO") 6514 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6515 6516 def _parse_alter_table_set(self) -> exp.AlterSet: 6517 alter_set = self.expression(exp.AlterSet) 6518 6519 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6520 "TABLE", "PROPERTIES" 6521 ): 6522 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6523 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6524 alter_set.set("expressions", [self._parse_assignment()]) 6525 elif self._match_texts(("LOGGED", "UNLOGGED")): 6526 alter_set.set("option", exp.var(self._prev.text.upper())) 6527 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6528 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6529 elif self._match_text_seq("LOCATION"): 6530 alter_set.set("location", self._parse_field()) 6531 elif self._match_text_seq("ACCESS", "METHOD"): 6532 alter_set.set("access_method", self._parse_field()) 6533 elif self._match_text_seq("TABLESPACE"): 6534 alter_set.set("tablespace", self._parse_field()) 6535 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6536 alter_set.set("file_format", [self._parse_field()]) 6537 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6538 alter_set.set("file_format", self._parse_wrapped_options()) 6539 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6540 alter_set.set("copy_options", self._parse_wrapped_options()) 6541 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6542 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6543 else: 6544 if self._match_text_seq("SERDE"): 6545 alter_set.set("serde", self._parse_field()) 6546 6547 alter_set.set("expressions", [self._parse_properties()]) 6548 6549 return alter_set 6550 6551 def _parse_alter(self) -> exp.Alter | exp.Command: 6552 start = self._prev 6553 6554 alter_token = self._match_set(self.ALTERABLES) and self._prev 6555 if not alter_token: 6556 return self._parse_as_command(start) 6557 6558 exists = self._parse_exists() 6559 only = self._match_text_seq("ONLY") 6560 this = self._parse_table(schema=True) 6561 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6562 6563 if self._next: 6564 self._advance() 6565 6566 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6567 if parser: 6568 actions = ensure_list(parser(self)) 6569 options = self._parse_csv(self._parse_property) 6570 6571 if not self._curr and actions: 6572 return self.expression( 6573 exp.Alter, 6574 this=this, 6575 kind=alter_token.text.upper(), 6576 exists=exists, 6577 actions=actions, 6578 only=only, 6579 options=options, 6580 cluster=cluster, 6581 ) 6582 6583 return self._parse_as_command(start) 6584 6585 def _parse_merge(self) -> exp.Merge: 6586 self._match(TokenType.INTO) 6587 target = self._parse_table() 6588 6589 if target and self._match(TokenType.ALIAS, advance=False): 6590 target.set("alias", self._parse_table_alias()) 6591 6592 self._match(TokenType.USING) 6593 using = self._parse_table() 6594 6595 self._match(TokenType.ON) 6596 on = self._parse_assignment() 6597 6598 return self.expression( 6599 exp.Merge, 6600 this=target, 6601 using=using, 6602 on=on, 6603 expressions=self._parse_when_matched(), 6604 ) 6605 6606 def _parse_when_matched(self) -> t.List[exp.When]: 6607 whens = [] 6608 6609 while self._match(TokenType.WHEN): 6610 matched = not self._match(TokenType.NOT) 6611 self._match_text_seq("MATCHED") 6612 source = ( 6613 False 6614 if self._match_text_seq("BY", "TARGET") 6615 else self._match_text_seq("BY", "SOURCE") 6616 ) 6617 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6618 6619 self._match(TokenType.THEN) 6620 6621 if self._match(TokenType.INSERT): 6622 _this = self._parse_star() 6623 if _this: 6624 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6625 else: 6626 then = self.expression( 6627 exp.Insert, 6628 this=self._parse_value(), 6629 expression=self._match_text_seq("VALUES") and self._parse_value(), 6630 ) 6631 elif self._match(TokenType.UPDATE): 6632 expressions = self._parse_star() 6633 if expressions: 6634 then = self.expression(exp.Update, expressions=expressions) 6635 else: 6636 then = self.expression( 6637 exp.Update, 6638 expressions=self._match(TokenType.SET) 6639 and self._parse_csv(self._parse_equality), 6640 ) 6641 elif self._match(TokenType.DELETE): 6642 then = self.expression(exp.Var, this=self._prev.text) 6643 else: 6644 then = None 6645 6646 whens.append( 6647 self.expression( 6648 exp.When, 6649 matched=matched, 6650 source=source, 6651 condition=condition, 6652 then=then, 6653 ) 6654 ) 6655 return whens 6656 6657 def _parse_show(self) -> t.Optional[exp.Expression]: 6658 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6659 if parser: 6660 return parser(self) 6661 return self._parse_as_command(self._prev) 6662 6663 def _parse_set_item_assignment( 6664 self, kind: t.Optional[str] = None 6665 ) -> t.Optional[exp.Expression]: 6666 index = self._index 6667 6668 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6669 return self._parse_set_transaction(global_=kind == "GLOBAL") 6670 6671 left = self._parse_primary() or self._parse_column() 6672 assignment_delimiter = self._match_texts(("=", "TO")) 6673 6674 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6675 self._retreat(index) 6676 return None 6677 6678 right = self._parse_statement() or self._parse_id_var() 6679 if isinstance(right, (exp.Column, exp.Identifier)): 6680 right = exp.var(right.name) 6681 6682 this = self.expression(exp.EQ, this=left, expression=right) 6683 return self.expression(exp.SetItem, this=this, kind=kind) 6684 6685 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6686 self._match_text_seq("TRANSACTION") 6687 characteristics = self._parse_csv( 6688 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6689 ) 6690 return self.expression( 6691 exp.SetItem, 6692 expressions=characteristics, 6693 kind="TRANSACTION", 6694 **{"global": global_}, # type: ignore 6695 ) 6696 6697 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6698 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6699 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6700 6701 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6702 index = self._index 6703 set_ = self.expression( 6704 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6705 ) 6706 6707 if self._curr: 6708 self._retreat(index) 6709 return self._parse_as_command(self._prev) 6710 6711 return set_ 6712 6713 def _parse_var_from_options( 6714 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6715 ) -> t.Optional[exp.Var]: 6716 start = self._curr 6717 if not start: 6718 return None 6719 6720 option = start.text.upper() 6721 continuations = options.get(option) 6722 6723 index = self._index 6724 self._advance() 6725 for keywords in continuations or []: 6726 if isinstance(keywords, str): 6727 keywords = (keywords,) 6728 6729 if self._match_text_seq(*keywords): 6730 option = f"{option} {' '.join(keywords)}" 6731 break 6732 else: 6733 if continuations or continuations is None: 6734 if raise_unmatched: 6735 self.raise_error(f"Unknown option {option}") 6736 6737 self._retreat(index) 6738 return None 6739 6740 return exp.var(option) 6741 6742 def _parse_as_command(self, start: Token) -> exp.Command: 6743 while self._curr: 6744 self._advance() 6745 text = self._find_sql(start, self._prev) 6746 size = len(start.text) 6747 self._warn_unsupported() 6748 return exp.Command(this=text[:size], expression=text[size:]) 6749 6750 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6751 settings = [] 6752 6753 self._match_l_paren() 6754 kind = self._parse_id_var() 6755 6756 if self._match(TokenType.L_PAREN): 6757 while True: 6758 key = self._parse_id_var() 6759 value = self._parse_primary() 6760 6761 if not key and value is None: 6762 break 6763 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6764 self._match(TokenType.R_PAREN) 6765 6766 self._match_r_paren() 6767 6768 return self.expression( 6769 exp.DictProperty, 6770 this=this, 6771 kind=kind.this if kind else None, 6772 settings=settings, 6773 ) 6774 6775 def _parse_dict_range(self, this: str) -> exp.DictRange: 6776 self._match_l_paren() 6777 has_min = self._match_text_seq("MIN") 6778 if has_min: 6779 min = self._parse_var() or self._parse_primary() 6780 self._match_text_seq("MAX") 6781 max = self._parse_var() or self._parse_primary() 6782 else: 6783 max = self._parse_var() or self._parse_primary() 6784 min = exp.Literal.number(0) 6785 self._match_r_paren() 6786 return self.expression(exp.DictRange, this=this, min=min, max=max) 6787 6788 def _parse_comprehension( 6789 self, this: t.Optional[exp.Expression] 6790 ) -> t.Optional[exp.Comprehension]: 6791 index = self._index 6792 expression = self._parse_column() 6793 if not self._match(TokenType.IN): 6794 self._retreat(index - 1) 6795 return None 6796 iterator = self._parse_column() 6797 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6798 return self.expression( 6799 exp.Comprehension, 6800 this=this, 6801 expression=expression, 6802 iterator=iterator, 6803 condition=condition, 6804 ) 6805 6806 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6807 if self._match(TokenType.HEREDOC_STRING): 6808 return self.expression(exp.Heredoc, this=self._prev.text) 6809 6810 if not self._match_text_seq("$"): 6811 return None 6812 6813 tags = ["$"] 6814 tag_text = None 6815 6816 if self._is_connected(): 6817 self._advance() 6818 tags.append(self._prev.text.upper()) 6819 else: 6820 self.raise_error("No closing $ found") 6821 6822 if tags[-1] != "$": 6823 if self._is_connected() and self._match_text_seq("$"): 6824 tag_text = tags[-1] 6825 tags.append("$") 6826 else: 6827 self.raise_error("No closing $ found") 6828 6829 heredoc_start = self._curr 6830 6831 while self._curr: 6832 if self._match_text_seq(*tags, advance=False): 6833 this = self._find_sql(heredoc_start, self._prev) 6834 self._advance(len(tags)) 6835 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6836 6837 self._advance() 6838 6839 self.raise_error(f"No closing {''.join(tags)} found") 6840 return None 6841 6842 def _find_parser( 6843 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6844 ) -> t.Optional[t.Callable]: 6845 if not self._curr: 6846 return None 6847 6848 index = self._index 6849 this = [] 6850 while True: 6851 # The current token might be multiple words 6852 curr = self._curr.text.upper() 6853 key = curr.split(" ") 6854 this.append(curr) 6855 6856 self._advance() 6857 result, trie = in_trie(trie, key) 6858 if result == TrieResult.FAILED: 6859 break 6860 6861 if result == TrieResult.EXISTS: 6862 subparser = parsers[" ".join(this)] 6863 return subparser 6864 6865 self._retreat(index) 6866 return None 6867 6868 def _match(self, token_type, advance=True, expression=None): 6869 if not self._curr: 6870 return None 6871 6872 if self._curr.token_type == token_type: 6873 if advance: 6874 self._advance() 6875 self._add_comments(expression) 6876 return True 6877 6878 return None 6879 6880 def _match_set(self, types, advance=True): 6881 if not self._curr: 6882 return None 6883 6884 if self._curr.token_type in types: 6885 if advance: 6886 self._advance() 6887 return True 6888 6889 return None 6890 6891 def _match_pair(self, token_type_a, token_type_b, advance=True): 6892 if not self._curr or not self._next: 6893 return None 6894 6895 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6896 if advance: 6897 self._advance(2) 6898 return True 6899 6900 return None 6901 6902 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6903 if not self._match(TokenType.L_PAREN, expression=expression): 6904 self.raise_error("Expecting (") 6905 6906 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6907 if not self._match(TokenType.R_PAREN, expression=expression): 6908 self.raise_error("Expecting )") 6909 6910 def _match_texts(self, texts, advance=True): 6911 if self._curr and self._curr.text.upper() in texts: 6912 if advance: 6913 self._advance() 6914 return True 6915 return None 6916 6917 def _match_text_seq(self, *texts, advance=True): 6918 index = self._index 6919 for text in texts: 6920 if self._curr and self._curr.text.upper() == text: 6921 self._advance() 6922 else: 6923 self._retreat(index) 6924 return None 6925 6926 if not advance: 6927 self._retreat(index) 6928 6929 return True 6930 6931 def _replace_lambda( 6932 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6933 ) -> t.Optional[exp.Expression]: 6934 if not node: 6935 return node 6936 6937 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6938 6939 for column in node.find_all(exp.Column): 6940 typ = lambda_types.get(column.parts[0].name) 6941 if typ is not None: 6942 dot_or_id = column.to_dot() if column.table else column.this 6943 6944 if typ: 6945 dot_or_id = self.expression( 6946 exp.Cast, 6947 this=dot_or_id, 6948 to=typ, 6949 ) 6950 6951 parent = column.parent 6952 6953 while isinstance(parent, exp.Dot): 6954 if not isinstance(parent.parent, exp.Dot): 6955 parent.replace(dot_or_id) 6956 break 6957 parent = parent.parent 6958 else: 6959 if column is node: 6960 node = dot_or_id 6961 else: 6962 column.replace(dot_or_id) 6963 return node 6964 6965 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6966 start = self._prev 6967 6968 # Not to be confused with TRUNCATE(number, decimals) function call 6969 if self._match(TokenType.L_PAREN): 6970 self._retreat(self._index - 2) 6971 return self._parse_function() 6972 6973 # Clickhouse supports TRUNCATE DATABASE as well 6974 is_database = self._match(TokenType.DATABASE) 6975 6976 self._match(TokenType.TABLE) 6977 6978 exists = self._parse_exists(not_=False) 6979 6980 expressions = self._parse_csv( 6981 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6982 ) 6983 6984 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6985 6986 if self._match_text_seq("RESTART", "IDENTITY"): 6987 identity = "RESTART" 6988 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6989 identity = "CONTINUE" 6990 else: 6991 identity = None 6992 6993 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6994 option = self._prev.text 6995 else: 6996 option = None 6997 6998 partition = self._parse_partition() 6999 7000 # Fallback case 7001 if self._curr: 7002 return self._parse_as_command(start) 7003 7004 return self.expression( 7005 exp.TruncateTable, 7006 expressions=expressions, 7007 is_database=is_database, 7008 exists=exists, 7009 cluster=cluster, 7010 identity=identity, 7011 option=option, 7012 partition=partition, 7013 ) 7014 7015 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7016 this = self._parse_ordered(self._parse_opclass) 7017 7018 if not self._match(TokenType.WITH): 7019 return this 7020 7021 op = self._parse_var(any_token=True) 7022 7023 return self.expression(exp.WithOperator, this=this, op=op) 7024 7025 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7026 self._match(TokenType.EQ) 7027 self._match(TokenType.L_PAREN) 7028 7029 opts: t.List[t.Optional[exp.Expression]] = [] 7030 while self._curr and not self._match(TokenType.R_PAREN): 7031 if self._match_text_seq("FORMAT_NAME", "="): 7032 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7033 # so we parse it separately to use _parse_field() 7034 prop = self.expression( 7035 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7036 ) 7037 opts.append(prop) 7038 else: 7039 opts.append(self._parse_property()) 7040 7041 self._match(TokenType.COMMA) 7042 7043 return opts 7044 7045 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7046 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7047 7048 options = [] 7049 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7050 option = self._parse_var(any_token=True) 7051 prev = self._prev.text.upper() 7052 7053 # Different dialects might separate options and values by white space, "=" and "AS" 7054 self._match(TokenType.EQ) 7055 self._match(TokenType.ALIAS) 7056 7057 param = self.expression(exp.CopyParameter, this=option) 7058 7059 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7060 TokenType.L_PAREN, advance=False 7061 ): 7062 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7063 param.set("expressions", self._parse_wrapped_options()) 7064 elif prev == "FILE_FORMAT": 7065 # T-SQL's external file format case 7066 param.set("expression", self._parse_field()) 7067 else: 7068 param.set("expression", self._parse_unquoted_field()) 7069 7070 options.append(param) 7071 self._match(sep) 7072 7073 return options 7074 7075 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7076 expr = self.expression(exp.Credentials) 7077 7078 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7079 expr.set("storage", self._parse_field()) 7080 if self._match_text_seq("CREDENTIALS"): 7081 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7082 creds = ( 7083 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7084 ) 7085 expr.set("credentials", creds) 7086 if self._match_text_seq("ENCRYPTION"): 7087 expr.set("encryption", self._parse_wrapped_options()) 7088 if self._match_text_seq("IAM_ROLE"): 7089 expr.set("iam_role", self._parse_field()) 7090 if self._match_text_seq("REGION"): 7091 expr.set("region", self._parse_field()) 7092 7093 return expr 7094 7095 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7096 return self._parse_field() 7097 7098 def _parse_copy(self) -> exp.Copy | exp.Command: 7099 start = self._prev 7100 7101 self._match(TokenType.INTO) 7102 7103 this = ( 7104 self._parse_select(nested=True, parse_subquery_alias=False) 7105 if self._match(TokenType.L_PAREN, advance=False) 7106 else self._parse_table(schema=True) 7107 ) 7108 7109 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7110 7111 files = self._parse_csv(self._parse_file_location) 7112 credentials = self._parse_credentials() 7113 7114 self._match_text_seq("WITH") 7115 7116 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7117 7118 # Fallback case 7119 if self._curr: 7120 return self._parse_as_command(start) 7121 7122 return self.expression( 7123 exp.Copy, 7124 this=this, 7125 kind=kind, 7126 credentials=credentials, 7127 files=files, 7128 params=params, 7129 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
154class Parser(metaclass=_Parser): 155 """ 156 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 157 158 Args: 159 error_level: The desired error level. 160 Default: ErrorLevel.IMMEDIATE 161 error_message_context: The amount of context to capture from a query string when displaying 162 the error message (in number of characters). 163 Default: 100 164 max_errors: Maximum number of error messages to include in a raised ParseError. 165 This is only relevant if error_level is ErrorLevel.RAISE. 166 Default: 3 167 """ 168 169 FUNCTIONS: t.Dict[str, t.Callable] = { 170 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 171 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 172 "CONCAT": lambda args, dialect: exp.Concat( 173 expressions=args, 174 safe=not dialect.STRICT_STRING_CONCAT, 175 coalesce=dialect.CONCAT_COALESCE, 176 ), 177 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 178 expressions=args, 179 safe=not dialect.STRICT_STRING_CONCAT, 180 coalesce=dialect.CONCAT_COALESCE, 181 ), 182 "CONVERT_TIMEZONE": build_convert_timezone, 183 "DATE_TO_DATE_STR": lambda args: exp.Cast( 184 this=seq_get(args, 0), 185 to=exp.DataType(this=exp.DataType.Type.TEXT), 186 ), 187 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 188 start=seq_get(args, 0), 189 end=seq_get(args, 1), 190 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 191 ), 192 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 193 "HEX": build_hex, 194 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 195 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 196 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 197 "LIKE": build_like, 198 "LOG": build_logarithm, 199 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 200 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 201 "LOWER": build_lower, 202 "LPAD": lambda args: build_pad(args), 203 "LEFTPAD": lambda args: build_pad(args), 204 "MOD": build_mod, 205 "RPAD": lambda args: build_pad(args, is_left=False), 206 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 207 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 208 if len(args) != 2 209 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 210 "TIME_TO_TIME_STR": lambda args: exp.Cast( 211 this=seq_get(args, 0), 212 to=exp.DataType(this=exp.DataType.Type.TEXT), 213 ), 214 "TO_HEX": build_hex, 215 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 216 this=exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 start=exp.Literal.number(1), 221 length=exp.Literal.number(10), 222 ), 223 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 224 "UPPER": build_upper, 225 "VAR_MAP": build_var_map, 226 } 227 228 NO_PAREN_FUNCTIONS = { 229 TokenType.CURRENT_DATE: exp.CurrentDate, 230 TokenType.CURRENT_DATETIME: exp.CurrentDate, 231 TokenType.CURRENT_TIME: exp.CurrentTime, 232 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 233 TokenType.CURRENT_USER: exp.CurrentUser, 234 } 235 236 STRUCT_TYPE_TOKENS = { 237 TokenType.NESTED, 238 TokenType.OBJECT, 239 TokenType.STRUCT, 240 } 241 242 NESTED_TYPE_TOKENS = { 243 TokenType.ARRAY, 244 TokenType.LIST, 245 TokenType.LOWCARDINALITY, 246 TokenType.MAP, 247 TokenType.NULLABLE, 248 *STRUCT_TYPE_TOKENS, 249 } 250 251 ENUM_TYPE_TOKENS = { 252 TokenType.ENUM, 253 TokenType.ENUM8, 254 TokenType.ENUM16, 255 } 256 257 AGGREGATE_TYPE_TOKENS = { 258 TokenType.AGGREGATEFUNCTION, 259 TokenType.SIMPLEAGGREGATEFUNCTION, 260 } 261 262 TYPE_TOKENS = { 263 TokenType.BIT, 264 TokenType.BOOLEAN, 265 TokenType.TINYINT, 266 TokenType.UTINYINT, 267 TokenType.SMALLINT, 268 TokenType.USMALLINT, 269 TokenType.INT, 270 TokenType.UINT, 271 TokenType.BIGINT, 272 TokenType.UBIGINT, 273 TokenType.INT128, 274 TokenType.UINT128, 275 TokenType.INT256, 276 TokenType.UINT256, 277 TokenType.MEDIUMINT, 278 TokenType.UMEDIUMINT, 279 TokenType.FIXEDSTRING, 280 TokenType.FLOAT, 281 TokenType.DOUBLE, 282 TokenType.CHAR, 283 TokenType.NCHAR, 284 TokenType.VARCHAR, 285 TokenType.NVARCHAR, 286 TokenType.BPCHAR, 287 TokenType.TEXT, 288 TokenType.MEDIUMTEXT, 289 TokenType.LONGTEXT, 290 TokenType.MEDIUMBLOB, 291 TokenType.LONGBLOB, 292 TokenType.BINARY, 293 TokenType.VARBINARY, 294 TokenType.JSON, 295 TokenType.JSONB, 296 TokenType.INTERVAL, 297 TokenType.TINYBLOB, 298 TokenType.TINYTEXT, 299 TokenType.TIME, 300 TokenType.TIMETZ, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMP_S, 303 TokenType.TIMESTAMP_MS, 304 TokenType.TIMESTAMP_NS, 305 TokenType.TIMESTAMPTZ, 306 TokenType.TIMESTAMPLTZ, 307 TokenType.TIMESTAMPNTZ, 308 TokenType.DATETIME, 309 TokenType.DATETIME64, 310 TokenType.DATE, 311 TokenType.DATE32, 312 TokenType.INT4RANGE, 313 TokenType.INT4MULTIRANGE, 314 TokenType.INT8RANGE, 315 TokenType.INT8MULTIRANGE, 316 TokenType.NUMRANGE, 317 TokenType.NUMMULTIRANGE, 318 TokenType.TSRANGE, 319 TokenType.TSMULTIRANGE, 320 TokenType.TSTZRANGE, 321 TokenType.TSTZMULTIRANGE, 322 TokenType.DATERANGE, 323 TokenType.DATEMULTIRANGE, 324 TokenType.DECIMAL, 325 TokenType.UDECIMAL, 326 TokenType.BIGDECIMAL, 327 TokenType.UUID, 328 TokenType.GEOGRAPHY, 329 TokenType.GEOMETRY, 330 TokenType.HLLSKETCH, 331 TokenType.HSTORE, 332 TokenType.PSEUDO_TYPE, 333 TokenType.SUPER, 334 TokenType.SERIAL, 335 TokenType.SMALLSERIAL, 336 TokenType.BIGSERIAL, 337 TokenType.XML, 338 TokenType.YEAR, 339 TokenType.UNIQUEIDENTIFIER, 340 TokenType.USERDEFINED, 341 TokenType.MONEY, 342 TokenType.SMALLMONEY, 343 TokenType.ROWVERSION, 344 TokenType.IMAGE, 345 TokenType.VARIANT, 346 TokenType.VECTOR, 347 TokenType.OBJECT, 348 TokenType.OBJECT_IDENTIFIER, 349 TokenType.INET, 350 TokenType.IPADDRESS, 351 TokenType.IPPREFIX, 352 TokenType.IPV4, 353 TokenType.IPV6, 354 TokenType.UNKNOWN, 355 TokenType.NULL, 356 TokenType.NAME, 357 TokenType.TDIGEST, 358 *ENUM_TYPE_TOKENS, 359 *NESTED_TYPE_TOKENS, 360 *AGGREGATE_TYPE_TOKENS, 361 } 362 363 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 364 TokenType.BIGINT: TokenType.UBIGINT, 365 TokenType.INT: TokenType.UINT, 366 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 367 TokenType.SMALLINT: TokenType.USMALLINT, 368 TokenType.TINYINT: TokenType.UTINYINT, 369 TokenType.DECIMAL: TokenType.UDECIMAL, 370 } 371 372 SUBQUERY_PREDICATES = { 373 TokenType.ANY: exp.Any, 374 TokenType.ALL: exp.All, 375 TokenType.EXISTS: exp.Exists, 376 TokenType.SOME: exp.Any, 377 } 378 379 RESERVED_TOKENS = { 380 *Tokenizer.SINGLE_TOKENS.values(), 381 TokenType.SELECT, 382 } - {TokenType.IDENTIFIER} 383 384 DB_CREATABLES = { 385 TokenType.DATABASE, 386 TokenType.DICTIONARY, 387 TokenType.MODEL, 388 TokenType.SCHEMA, 389 TokenType.SEQUENCE, 390 TokenType.STORAGE_INTEGRATION, 391 TokenType.TABLE, 392 TokenType.TAG, 393 TokenType.VIEW, 394 TokenType.WAREHOUSE, 395 TokenType.STREAMLIT, 396 } 397 398 CREATABLES = { 399 TokenType.COLUMN, 400 TokenType.CONSTRAINT, 401 TokenType.FOREIGN_KEY, 402 TokenType.FUNCTION, 403 TokenType.INDEX, 404 TokenType.PROCEDURE, 405 *DB_CREATABLES, 406 } 407 408 ALTERABLES = { 409 TokenType.TABLE, 410 TokenType.VIEW, 411 } 412 413 # Tokens that can represent identifiers 414 ID_VAR_TOKENS = { 415 TokenType.ALL, 416 TokenType.VAR, 417 TokenType.ANTI, 418 TokenType.APPLY, 419 TokenType.ASC, 420 TokenType.ASOF, 421 TokenType.AUTO_INCREMENT, 422 TokenType.BEGIN, 423 TokenType.BPCHAR, 424 TokenType.CACHE, 425 TokenType.CASE, 426 TokenType.COLLATE, 427 TokenType.COMMAND, 428 TokenType.COMMENT, 429 TokenType.COMMIT, 430 TokenType.CONSTRAINT, 431 TokenType.COPY, 432 TokenType.CUBE, 433 TokenType.DEFAULT, 434 TokenType.DELETE, 435 TokenType.DESC, 436 TokenType.DESCRIBE, 437 TokenType.DICTIONARY, 438 TokenType.DIV, 439 TokenType.END, 440 TokenType.EXECUTE, 441 TokenType.ESCAPE, 442 TokenType.FALSE, 443 TokenType.FIRST, 444 TokenType.FILTER, 445 TokenType.FINAL, 446 TokenType.FORMAT, 447 TokenType.FULL, 448 TokenType.IDENTIFIER, 449 TokenType.IS, 450 TokenType.ISNULL, 451 TokenType.INTERVAL, 452 TokenType.KEEP, 453 TokenType.KILL, 454 TokenType.LEFT, 455 TokenType.LOAD, 456 TokenType.MERGE, 457 TokenType.NATURAL, 458 TokenType.NEXT, 459 TokenType.OFFSET, 460 TokenType.OPERATOR, 461 TokenType.ORDINALITY, 462 TokenType.OVERLAPS, 463 TokenType.OVERWRITE, 464 TokenType.PARTITION, 465 TokenType.PERCENT, 466 TokenType.PIVOT, 467 TokenType.PRAGMA, 468 TokenType.RANGE, 469 TokenType.RECURSIVE, 470 TokenType.REFERENCES, 471 TokenType.REFRESH, 472 TokenType.RENAME, 473 TokenType.REPLACE, 474 TokenType.RIGHT, 475 TokenType.ROLLUP, 476 TokenType.ROW, 477 TokenType.ROWS, 478 TokenType.SEMI, 479 TokenType.SET, 480 TokenType.SETTINGS, 481 TokenType.SHOW, 482 TokenType.TEMPORARY, 483 TokenType.TOP, 484 TokenType.TRUE, 485 TokenType.TRUNCATE, 486 TokenType.UNIQUE, 487 TokenType.UNNEST, 488 TokenType.UNPIVOT, 489 TokenType.UPDATE, 490 TokenType.USE, 491 TokenType.VOLATILE, 492 TokenType.WINDOW, 493 *CREATABLES, 494 *SUBQUERY_PREDICATES, 495 *TYPE_TOKENS, 496 *NO_PAREN_FUNCTIONS, 497 } 498 499 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 500 501 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 502 TokenType.ANTI, 503 TokenType.APPLY, 504 TokenType.ASOF, 505 TokenType.FULL, 506 TokenType.LEFT, 507 TokenType.LOCK, 508 TokenType.NATURAL, 509 TokenType.OFFSET, 510 TokenType.RIGHT, 511 TokenType.SEMI, 512 TokenType.WINDOW, 513 } 514 515 ALIAS_TOKENS = ID_VAR_TOKENS 516 517 ARRAY_CONSTRUCTORS = { 518 "ARRAY": exp.Array, 519 "LIST": exp.List, 520 } 521 522 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 523 524 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 525 526 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 527 528 FUNC_TOKENS = { 529 TokenType.COLLATE, 530 TokenType.COMMAND, 531 TokenType.CURRENT_DATE, 532 TokenType.CURRENT_DATETIME, 533 TokenType.CURRENT_TIMESTAMP, 534 TokenType.CURRENT_TIME, 535 TokenType.CURRENT_USER, 536 TokenType.FILTER, 537 TokenType.FIRST, 538 TokenType.FORMAT, 539 TokenType.GLOB, 540 TokenType.IDENTIFIER, 541 TokenType.INDEX, 542 TokenType.ISNULL, 543 TokenType.ILIKE, 544 TokenType.INSERT, 545 TokenType.LIKE, 546 TokenType.MERGE, 547 TokenType.OFFSET, 548 TokenType.PRIMARY_KEY, 549 TokenType.RANGE, 550 TokenType.REPLACE, 551 TokenType.RLIKE, 552 TokenType.ROW, 553 TokenType.UNNEST, 554 TokenType.VAR, 555 TokenType.LEFT, 556 TokenType.RIGHT, 557 TokenType.SEQUENCE, 558 TokenType.DATE, 559 TokenType.DATETIME, 560 TokenType.TABLE, 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TRUNCATE, 564 TokenType.WINDOW, 565 TokenType.XOR, 566 *TYPE_TOKENS, 567 *SUBQUERY_PREDICATES, 568 } 569 570 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 571 TokenType.AND: exp.And, 572 } 573 574 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 575 TokenType.COLON_EQ: exp.PropertyEQ, 576 } 577 578 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 579 TokenType.OR: exp.Or, 580 } 581 582 EQUALITY = { 583 TokenType.EQ: exp.EQ, 584 TokenType.NEQ: exp.NEQ, 585 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 586 } 587 588 COMPARISON = { 589 TokenType.GT: exp.GT, 590 TokenType.GTE: exp.GTE, 591 TokenType.LT: exp.LT, 592 TokenType.LTE: exp.LTE, 593 } 594 595 BITWISE = { 596 TokenType.AMP: exp.BitwiseAnd, 597 TokenType.CARET: exp.BitwiseXor, 598 TokenType.PIPE: exp.BitwiseOr, 599 } 600 601 TERM = { 602 TokenType.DASH: exp.Sub, 603 TokenType.PLUS: exp.Add, 604 TokenType.MOD: exp.Mod, 605 TokenType.COLLATE: exp.Collate, 606 } 607 608 FACTOR = { 609 TokenType.DIV: exp.IntDiv, 610 TokenType.LR_ARROW: exp.Distance, 611 TokenType.SLASH: exp.Div, 612 TokenType.STAR: exp.Mul, 613 } 614 615 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 616 617 TIMES = { 618 TokenType.TIME, 619 TokenType.TIMETZ, 620 } 621 622 TIMESTAMPS = { 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TIMESTAMPLTZ, 626 *TIMES, 627 } 628 629 SET_OPERATIONS = { 630 TokenType.UNION, 631 TokenType.INTERSECT, 632 TokenType.EXCEPT, 633 } 634 635 JOIN_METHODS = { 636 TokenType.ASOF, 637 TokenType.NATURAL, 638 TokenType.POSITIONAL, 639 } 640 641 JOIN_SIDES = { 642 TokenType.LEFT, 643 TokenType.RIGHT, 644 TokenType.FULL, 645 } 646 647 JOIN_KINDS = { 648 TokenType.ANTI, 649 TokenType.CROSS, 650 TokenType.INNER, 651 TokenType.OUTER, 652 TokenType.SEMI, 653 TokenType.STRAIGHT_JOIN, 654 } 655 656 JOIN_HINTS: t.Set[str] = set() 657 658 LAMBDAS = { 659 TokenType.ARROW: lambda self, expressions: self.expression( 660 exp.Lambda, 661 this=self._replace_lambda( 662 self._parse_assignment(), 663 expressions, 664 ), 665 expressions=expressions, 666 ), 667 TokenType.FARROW: lambda self, expressions: self.expression( 668 exp.Kwarg, 669 this=exp.var(expressions[0].name), 670 expression=self._parse_assignment(), 671 ), 672 } 673 674 COLUMN_OPERATORS = { 675 TokenType.DOT: None, 676 TokenType.DCOLON: lambda self, this, to: self.expression( 677 exp.Cast if self.STRICT_CAST else exp.TryCast, 678 this=this, 679 to=to, 680 ), 681 TokenType.ARROW: lambda self, this, path: self.expression( 682 exp.JSONExtract, 683 this=this, 684 expression=self.dialect.to_json_path(path), 685 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 686 ), 687 TokenType.DARROW: lambda self, this, path: self.expression( 688 exp.JSONExtractScalar, 689 this=this, 690 expression=self.dialect.to_json_path(path), 691 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 692 ), 693 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 694 exp.JSONBExtract, 695 this=this, 696 expression=path, 697 ), 698 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 699 exp.JSONBExtractScalar, 700 this=this, 701 expression=path, 702 ), 703 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 704 exp.JSONBContains, 705 this=this, 706 expression=key, 707 ), 708 } 709 710 EXPRESSION_PARSERS = { 711 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 712 exp.Column: lambda self: self._parse_column(), 713 exp.Condition: lambda self: self._parse_assignment(), 714 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 715 exp.Expression: lambda self: self._parse_expression(), 716 exp.From: lambda self: self._parse_from(joins=True), 717 exp.Group: lambda self: self._parse_group(), 718 exp.Having: lambda self: self._parse_having(), 719 exp.Identifier: lambda self: self._parse_id_var(), 720 exp.Join: lambda self: self._parse_join(), 721 exp.Lambda: lambda self: self._parse_lambda(), 722 exp.Lateral: lambda self: self._parse_lateral(), 723 exp.Limit: lambda self: self._parse_limit(), 724 exp.Offset: lambda self: self._parse_offset(), 725 exp.Order: lambda self: self._parse_order(), 726 exp.Ordered: lambda self: self._parse_ordered(), 727 exp.Properties: lambda self: self._parse_properties(), 728 exp.Qualify: lambda self: self._parse_qualify(), 729 exp.Returning: lambda self: self._parse_returning(), 730 exp.Select: lambda self: self._parse_select(), 731 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 732 exp.Table: lambda self: self._parse_table_parts(), 733 exp.TableAlias: lambda self: self._parse_table_alias(), 734 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 735 exp.Where: lambda self: self._parse_where(), 736 exp.Window: lambda self: self._parse_named_window(), 737 exp.With: lambda self: self._parse_with(), 738 "JOIN_TYPE": lambda self: self._parse_join_parts(), 739 } 740 741 STATEMENT_PARSERS = { 742 TokenType.ALTER: lambda self: self._parse_alter(), 743 TokenType.BEGIN: lambda self: self._parse_transaction(), 744 TokenType.CACHE: lambda self: self._parse_cache(), 745 TokenType.COMMENT: lambda self: self._parse_comment(), 746 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 747 TokenType.COPY: lambda self: self._parse_copy(), 748 TokenType.CREATE: lambda self: self._parse_create(), 749 TokenType.DELETE: lambda self: self._parse_delete(), 750 TokenType.DESC: lambda self: self._parse_describe(), 751 TokenType.DESCRIBE: lambda self: self._parse_describe(), 752 TokenType.DROP: lambda self: self._parse_drop(), 753 TokenType.INSERT: lambda self: self._parse_insert(), 754 TokenType.KILL: lambda self: self._parse_kill(), 755 TokenType.LOAD: lambda self: self._parse_load(), 756 TokenType.MERGE: lambda self: self._parse_merge(), 757 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 758 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 759 TokenType.REFRESH: lambda self: self._parse_refresh(), 760 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 761 TokenType.SET: lambda self: self._parse_set(), 762 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 763 TokenType.UNCACHE: lambda self: self._parse_uncache(), 764 TokenType.UPDATE: lambda self: self._parse_update(), 765 TokenType.USE: lambda self: self.expression( 766 exp.Use, 767 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 768 this=self._parse_table(schema=False), 769 ), 770 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 771 } 772 773 UNARY_PARSERS = { 774 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 775 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 776 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 777 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 778 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 779 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 780 } 781 782 STRING_PARSERS = { 783 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 784 exp.RawString, this=token.text 785 ), 786 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 787 exp.National, this=token.text 788 ), 789 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 790 TokenType.STRING: lambda self, token: self.expression( 791 exp.Literal, this=token.text, is_string=True 792 ), 793 TokenType.UNICODE_STRING: lambda self, token: self.expression( 794 exp.UnicodeString, 795 this=token.text, 796 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 797 ), 798 } 799 800 NUMERIC_PARSERS = { 801 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 802 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 803 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 804 TokenType.NUMBER: lambda self, token: self.expression( 805 exp.Literal, this=token.text, is_string=False 806 ), 807 } 808 809 PRIMARY_PARSERS = { 810 **STRING_PARSERS, 811 **NUMERIC_PARSERS, 812 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 813 TokenType.NULL: lambda self, _: self.expression(exp.Null), 814 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 815 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 816 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 817 TokenType.STAR: lambda self, _: self.expression( 818 exp.Star, 819 **{ 820 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 821 "replace": self._parse_star_op("REPLACE"), 822 "rename": self._parse_star_op("RENAME"), 823 }, 824 ), 825 } 826 827 PLACEHOLDER_PARSERS = { 828 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 829 TokenType.PARAMETER: lambda self: self._parse_parameter(), 830 TokenType.COLON: lambda self: ( 831 self.expression(exp.Placeholder, this=self._prev.text) 832 if self._match_set(self.ID_VAR_TOKENS) 833 else None 834 ), 835 } 836 837 RANGE_PARSERS = { 838 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 839 TokenType.GLOB: binary_range_parser(exp.Glob), 840 TokenType.ILIKE: binary_range_parser(exp.ILike), 841 TokenType.IN: lambda self, this: self._parse_in(this), 842 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 843 TokenType.IS: lambda self, this: self._parse_is(this), 844 TokenType.LIKE: binary_range_parser(exp.Like), 845 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 846 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 847 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 848 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 849 } 850 851 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 852 "ALLOWED_VALUES": lambda self: self.expression( 853 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 854 ), 855 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 856 "AUTO": lambda self: self._parse_auto_property(), 857 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 858 "BACKUP": lambda self: self.expression( 859 exp.BackupProperty, this=self._parse_var(any_token=True) 860 ), 861 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 862 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 864 "CHECKSUM": lambda self: self._parse_checksum(), 865 "CLUSTER BY": lambda self: self._parse_cluster(), 866 "CLUSTERED": lambda self: self._parse_clustered_by(), 867 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 868 exp.CollateProperty, **kwargs 869 ), 870 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 871 "CONTAINS": lambda self: self._parse_contains_property(), 872 "COPY": lambda self: self._parse_copy_property(), 873 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 874 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 875 "DEFINER": lambda self: self._parse_definer(), 876 "DETERMINISTIC": lambda self: self.expression( 877 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 878 ), 879 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 880 "DISTKEY": lambda self: self._parse_distkey(), 881 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 882 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 883 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 884 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 885 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 886 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 887 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 888 "FREESPACE": lambda self: self._parse_freespace(), 889 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 890 "HEAP": lambda self: self.expression(exp.HeapProperty), 891 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 892 "IMMUTABLE": lambda self: self.expression( 893 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 894 ), 895 "INHERITS": lambda self: self.expression( 896 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 897 ), 898 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 899 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 900 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 901 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 902 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 903 "LIKE": lambda self: self._parse_create_like(), 904 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 905 "LOCK": lambda self: self._parse_locking(), 906 "LOCKING": lambda self: self._parse_locking(), 907 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 908 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 909 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 910 "MODIFIES": lambda self: self._parse_modifies_property(), 911 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 912 "NO": lambda self: self._parse_no_property(), 913 "ON": lambda self: self._parse_on_property(), 914 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 915 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 916 "PARTITION": lambda self: self._parse_partitioned_of(), 917 "PARTITION BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 919 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 920 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 921 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 922 "READS": lambda self: self._parse_reads_property(), 923 "REMOTE": lambda self: self._parse_remote_with_connection(), 924 "RETURNS": lambda self: self._parse_returns(), 925 "STRICT": lambda self: self.expression(exp.StrictProperty), 926 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 927 "ROW": lambda self: self._parse_row(), 928 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 929 "SAMPLE": lambda self: self.expression( 930 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 931 ), 932 "SECURE": lambda self: self.expression(exp.SecureProperty), 933 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 934 "SETTINGS": lambda self: self.expression( 935 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 936 ), 937 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 938 "SORTKEY": lambda self: self._parse_sortkey(), 939 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 940 "STABLE": lambda self: self.expression( 941 exp.StabilityProperty, this=exp.Literal.string("STABLE") 942 ), 943 "STORED": lambda self: self._parse_stored(), 944 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 945 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 946 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 947 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 948 "TO": lambda self: self._parse_to_table(), 949 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 950 "TRANSFORM": lambda self: self.expression( 951 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 952 ), 953 "TTL": lambda self: self._parse_ttl(), 954 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 955 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 956 "VOLATILE": lambda self: self._parse_volatile_property(), 957 "WITH": lambda self: self._parse_with_property(), 958 } 959 960 CONSTRAINT_PARSERS = { 961 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 962 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 963 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 964 "CHARACTER SET": lambda self: self.expression( 965 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 966 ), 967 "CHECK": lambda self: self.expression( 968 exp.CheckColumnConstraint, 969 this=self._parse_wrapped(self._parse_assignment), 970 enforced=self._match_text_seq("ENFORCED"), 971 ), 972 "COLLATE": lambda self: self.expression( 973 exp.CollateColumnConstraint, 974 this=self._parse_identifier() or self._parse_column(), 975 ), 976 "COMMENT": lambda self: self.expression( 977 exp.CommentColumnConstraint, this=self._parse_string() 978 ), 979 "COMPRESS": lambda self: self._parse_compress(), 980 "CLUSTERED": lambda self: self.expression( 981 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 982 ), 983 "NONCLUSTERED": lambda self: self.expression( 984 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 985 ), 986 "DEFAULT": lambda self: self.expression( 987 exp.DefaultColumnConstraint, this=self._parse_bitwise() 988 ), 989 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 990 "EPHEMERAL": lambda self: self.expression( 991 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 992 ), 993 "EXCLUDE": lambda self: self.expression( 994 exp.ExcludeColumnConstraint, this=self._parse_index_params() 995 ), 996 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 997 "FORMAT": lambda self: self.expression( 998 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 999 ), 1000 "GENERATED": lambda self: self._parse_generated_as_identity(), 1001 "IDENTITY": lambda self: self._parse_auto_increment(), 1002 "INLINE": lambda self: self._parse_inline(), 1003 "LIKE": lambda self: self._parse_create_like(), 1004 "NOT": lambda self: self._parse_not_constraint(), 1005 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1006 "ON": lambda self: ( 1007 self._match(TokenType.UPDATE) 1008 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1009 ) 1010 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1011 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1012 "PERIOD": lambda self: self._parse_period_for_system_time(), 1013 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1014 "REFERENCES": lambda self: self._parse_references(match=False), 1015 "TITLE": lambda self: self.expression( 1016 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1017 ), 1018 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1019 "UNIQUE": lambda self: self._parse_unique(), 1020 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1021 "WITH": lambda self: self.expression( 1022 exp.Properties, expressions=self._parse_wrapped_properties() 1023 ), 1024 } 1025 1026 ALTER_PARSERS = { 1027 "ADD": lambda self: self._parse_alter_table_add(), 1028 "ALTER": lambda self: self._parse_alter_table_alter(), 1029 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1030 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1031 "DROP": lambda self: self._parse_alter_table_drop(), 1032 "RENAME": lambda self: self._parse_alter_table_rename(), 1033 "SET": lambda self: self._parse_alter_table_set(), 1034 "AS": lambda self: self._parse_select(), 1035 } 1036 1037 ALTER_ALTER_PARSERS = { 1038 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1039 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1040 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1041 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1042 } 1043 1044 SCHEMA_UNNAMED_CONSTRAINTS = { 1045 "CHECK", 1046 "EXCLUDE", 1047 "FOREIGN KEY", 1048 "LIKE", 1049 "PERIOD", 1050 "PRIMARY KEY", 1051 "UNIQUE", 1052 } 1053 1054 NO_PAREN_FUNCTION_PARSERS = { 1055 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1056 "CASE": lambda self: self._parse_case(), 1057 "CONNECT_BY_ROOT": lambda self: self.expression( 1058 exp.ConnectByRoot, this=self._parse_column() 1059 ), 1060 "IF": lambda self: self._parse_if(), 1061 "NEXT": lambda self: self._parse_next_value_for(), 1062 } 1063 1064 INVALID_FUNC_NAME_TOKENS = { 1065 TokenType.IDENTIFIER, 1066 TokenType.STRING, 1067 } 1068 1069 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1070 1071 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1072 1073 FUNCTION_PARSERS = { 1074 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1075 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1076 "DECODE": lambda self: self._parse_decode(), 1077 "EXTRACT": lambda self: self._parse_extract(), 1078 "GAP_FILL": lambda self: self._parse_gap_fill(), 1079 "JSON_OBJECT": lambda self: self._parse_json_object(), 1080 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1081 "JSON_TABLE": lambda self: self._parse_json_table(), 1082 "MATCH": lambda self: self._parse_match_against(), 1083 "OPENJSON": lambda self: self._parse_open_json(), 1084 "POSITION": lambda self: self._parse_position(), 1085 "PREDICT": lambda self: self._parse_predict(), 1086 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1087 "STRING_AGG": lambda self: self._parse_string_agg(), 1088 "SUBSTRING": lambda self: self._parse_substring(), 1089 "TRIM": lambda self: self._parse_trim(), 1090 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1091 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1092 } 1093 1094 QUERY_MODIFIER_PARSERS = { 1095 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1096 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1097 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1098 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1099 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1100 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1101 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1102 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1103 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1104 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1105 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1106 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1107 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1108 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1109 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1110 TokenType.CLUSTER_BY: lambda self: ( 1111 "cluster", 1112 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1113 ), 1114 TokenType.DISTRIBUTE_BY: lambda self: ( 1115 "distribute", 1116 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1117 ), 1118 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1119 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1120 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1121 } 1122 1123 SET_PARSERS = { 1124 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1125 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1126 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1127 "TRANSACTION": lambda self: self._parse_set_transaction(), 1128 } 1129 1130 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1131 1132 TYPE_LITERAL_PARSERS = { 1133 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1134 } 1135 1136 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1137 1138 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1139 1140 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1141 1142 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1143 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1144 "ISOLATION": ( 1145 ("LEVEL", "REPEATABLE", "READ"), 1146 ("LEVEL", "READ", "COMMITTED"), 1147 ("LEVEL", "READ", "UNCOMITTED"), 1148 ("LEVEL", "SERIALIZABLE"), 1149 ), 1150 "READ": ("WRITE", "ONLY"), 1151 } 1152 1153 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1154 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1155 ) 1156 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1157 1158 CREATE_SEQUENCE: OPTIONS_TYPE = { 1159 "SCALE": ("EXTEND", "NOEXTEND"), 1160 "SHARD": ("EXTEND", "NOEXTEND"), 1161 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1162 **dict.fromkeys( 1163 ( 1164 "SESSION", 1165 "GLOBAL", 1166 "KEEP", 1167 "NOKEEP", 1168 "ORDER", 1169 "NOORDER", 1170 "NOCACHE", 1171 "CYCLE", 1172 "NOCYCLE", 1173 "NOMINVALUE", 1174 "NOMAXVALUE", 1175 "NOSCALE", 1176 "NOSHARD", 1177 ), 1178 tuple(), 1179 ), 1180 } 1181 1182 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1183 1184 USABLES: OPTIONS_TYPE = dict.fromkeys( 1185 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1186 ) 1187 1188 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1189 1190 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1191 "TYPE": ("EVOLUTION",), 1192 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1193 } 1194 1195 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1196 "NOT": ("ENFORCED",), 1197 "MATCH": ( 1198 "FULL", 1199 "PARTIAL", 1200 "SIMPLE", 1201 ), 1202 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1203 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1204 } 1205 1206 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1207 1208 CLONE_KEYWORDS = {"CLONE", "COPY"} 1209 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1210 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1211 1212 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1213 1214 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1215 1216 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1217 1218 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1219 1220 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1221 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1222 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1223 1224 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1225 1226 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1227 1228 ADD_CONSTRAINT_TOKENS = { 1229 TokenType.CONSTRAINT, 1230 TokenType.FOREIGN_KEY, 1231 TokenType.INDEX, 1232 TokenType.KEY, 1233 TokenType.PRIMARY_KEY, 1234 TokenType.UNIQUE, 1235 } 1236 1237 DISTINCT_TOKENS = {TokenType.DISTINCT} 1238 1239 NULL_TOKENS = {TokenType.NULL} 1240 1241 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1242 1243 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1244 1245 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1246 1247 STRICT_CAST = True 1248 1249 PREFIXED_PIVOT_COLUMNS = False 1250 IDENTIFY_PIVOT_STRINGS = False 1251 1252 LOG_DEFAULTS_TO_LN = False 1253 1254 # Whether ADD is present for each column added by ALTER TABLE 1255 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1256 1257 # Whether the table sample clause expects CSV syntax 1258 TABLESAMPLE_CSV = False 1259 1260 # The default method used for table sampling 1261 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1262 1263 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1264 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1265 1266 # Whether the TRIM function expects the characters to trim as its first argument 1267 TRIM_PATTERN_FIRST = False 1268 1269 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1270 STRING_ALIASES = False 1271 1272 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1273 MODIFIERS_ATTACHED_TO_SET_OP = True 1274 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1275 1276 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1277 NO_PAREN_IF_COMMANDS = True 1278 1279 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1280 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1281 1282 # Whether the `:` operator is used to extract a value from a VARIANT column 1283 COLON_IS_VARIANT_EXTRACT = False 1284 1285 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1286 # If this is True and '(' is not found, the keyword will be treated as an identifier 1287 VALUES_FOLLOWED_BY_PAREN = True 1288 1289 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1290 SUPPORTS_IMPLICIT_UNNEST = False 1291 1292 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1293 INTERVAL_SPANS = True 1294 1295 # Whether a PARTITION clause can follow a table reference 1296 SUPPORTS_PARTITION_SELECTION = False 1297 1298 __slots__ = ( 1299 "error_level", 1300 "error_message_context", 1301 "max_errors", 1302 "dialect", 1303 "sql", 1304 "errors", 1305 "_tokens", 1306 "_index", 1307 "_curr", 1308 "_next", 1309 "_prev", 1310 "_prev_comments", 1311 ) 1312 1313 # Autofilled 1314 SHOW_TRIE: t.Dict = {} 1315 SET_TRIE: t.Dict = {} 1316 1317 def __init__( 1318 self, 1319 error_level: t.Optional[ErrorLevel] = None, 1320 error_message_context: int = 100, 1321 max_errors: int = 3, 1322 dialect: DialectType = None, 1323 ): 1324 from sqlglot.dialects import Dialect 1325 1326 self.error_level = error_level or ErrorLevel.IMMEDIATE 1327 self.error_message_context = error_message_context 1328 self.max_errors = max_errors 1329 self.dialect = Dialect.get_or_raise(dialect) 1330 self.reset() 1331 1332 def reset(self): 1333 self.sql = "" 1334 self.errors = [] 1335 self._tokens = [] 1336 self._index = 0 1337 self._curr = None 1338 self._next = None 1339 self._prev = None 1340 self._prev_comments = None 1341 1342 def parse( 1343 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1344 ) -> t.List[t.Optional[exp.Expression]]: 1345 """ 1346 Parses a list of tokens and returns a list of syntax trees, one tree 1347 per parsed SQL statement. 1348 1349 Args: 1350 raw_tokens: The list of tokens. 1351 sql: The original SQL string, used to produce helpful debug messages. 1352 1353 Returns: 1354 The list of the produced syntax trees. 1355 """ 1356 return self._parse( 1357 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1358 ) 1359 1360 def parse_into( 1361 self, 1362 expression_types: exp.IntoType, 1363 raw_tokens: t.List[Token], 1364 sql: t.Optional[str] = None, 1365 ) -> t.List[t.Optional[exp.Expression]]: 1366 """ 1367 Parses a list of tokens into a given Expression type. If a collection of Expression 1368 types is given instead, this method will try to parse the token list into each one 1369 of them, stopping at the first for which the parsing succeeds. 1370 1371 Args: 1372 expression_types: The expression type(s) to try and parse the token list into. 1373 raw_tokens: The list of tokens. 1374 sql: The original SQL string, used to produce helpful debug messages. 1375 1376 Returns: 1377 The target Expression. 1378 """ 1379 errors = [] 1380 for expression_type in ensure_list(expression_types): 1381 parser = self.EXPRESSION_PARSERS.get(expression_type) 1382 if not parser: 1383 raise TypeError(f"No parser registered for {expression_type}") 1384 1385 try: 1386 return self._parse(parser, raw_tokens, sql) 1387 except ParseError as e: 1388 e.errors[0]["into_expression"] = expression_type 1389 errors.append(e) 1390 1391 raise ParseError( 1392 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1393 errors=merge_errors(errors), 1394 ) from errors[-1] 1395 1396 def _parse( 1397 self, 1398 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1399 raw_tokens: t.List[Token], 1400 sql: t.Optional[str] = None, 1401 ) -> t.List[t.Optional[exp.Expression]]: 1402 self.reset() 1403 self.sql = sql or "" 1404 1405 total = len(raw_tokens) 1406 chunks: t.List[t.List[Token]] = [[]] 1407 1408 for i, token in enumerate(raw_tokens): 1409 if token.token_type == TokenType.SEMICOLON: 1410 if token.comments: 1411 chunks.append([token]) 1412 1413 if i < total - 1: 1414 chunks.append([]) 1415 else: 1416 chunks[-1].append(token) 1417 1418 expressions = [] 1419 1420 for tokens in chunks: 1421 self._index = -1 1422 self._tokens = tokens 1423 self._advance() 1424 1425 expressions.append(parse_method(self)) 1426 1427 if self._index < len(self._tokens): 1428 self.raise_error("Invalid expression / Unexpected token") 1429 1430 self.check_errors() 1431 1432 return expressions 1433 1434 def check_errors(self) -> None: 1435 """Logs or raises any found errors, depending on the chosen error level setting.""" 1436 if self.error_level == ErrorLevel.WARN: 1437 for error in self.errors: 1438 logger.error(str(error)) 1439 elif self.error_level == ErrorLevel.RAISE and self.errors: 1440 raise ParseError( 1441 concat_messages(self.errors, self.max_errors), 1442 errors=merge_errors(self.errors), 1443 ) 1444 1445 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1446 """ 1447 Appends an error in the list of recorded errors or raises it, depending on the chosen 1448 error level setting. 1449 """ 1450 token = token or self._curr or self._prev or Token.string("") 1451 start = token.start 1452 end = token.end + 1 1453 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1454 highlight = self.sql[start:end] 1455 end_context = self.sql[end : end + self.error_message_context] 1456 1457 error = ParseError.new( 1458 f"{message}. Line {token.line}, Col: {token.col}.\n" 1459 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1460 description=message, 1461 line=token.line, 1462 col=token.col, 1463 start_context=start_context, 1464 highlight=highlight, 1465 end_context=end_context, 1466 ) 1467 1468 if self.error_level == ErrorLevel.IMMEDIATE: 1469 raise error 1470 1471 self.errors.append(error) 1472 1473 def expression( 1474 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1475 ) -> E: 1476 """ 1477 Creates a new, validated Expression. 1478 1479 Args: 1480 exp_class: The expression class to instantiate. 1481 comments: An optional list of comments to attach to the expression. 1482 kwargs: The arguments to set for the expression along with their respective values. 1483 1484 Returns: 1485 The target expression. 1486 """ 1487 instance = exp_class(**kwargs) 1488 instance.add_comments(comments) if comments else self._add_comments(instance) 1489 return self.validate_expression(instance) 1490 1491 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1492 if expression and self._prev_comments: 1493 expression.add_comments(self._prev_comments) 1494 self._prev_comments = None 1495 1496 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1497 """ 1498 Validates an Expression, making sure that all its mandatory arguments are set. 1499 1500 Args: 1501 expression: The expression to validate. 1502 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1503 1504 Returns: 1505 The validated expression. 1506 """ 1507 if self.error_level != ErrorLevel.IGNORE: 1508 for error_message in expression.error_messages(args): 1509 self.raise_error(error_message) 1510 1511 return expression 1512 1513 def _find_sql(self, start: Token, end: Token) -> str: 1514 return self.sql[start.start : end.end + 1] 1515 1516 def _is_connected(self) -> bool: 1517 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1518 1519 def _advance(self, times: int = 1) -> None: 1520 self._index += times 1521 self._curr = seq_get(self._tokens, self._index) 1522 self._next = seq_get(self._tokens, self._index + 1) 1523 1524 if self._index > 0: 1525 self._prev = self._tokens[self._index - 1] 1526 self._prev_comments = self._prev.comments 1527 else: 1528 self._prev = None 1529 self._prev_comments = None 1530 1531 def _retreat(self, index: int) -> None: 1532 if index != self._index: 1533 self._advance(index - self._index) 1534 1535 def _warn_unsupported(self) -> None: 1536 if len(self._tokens) <= 1: 1537 return 1538 1539 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1540 # interested in emitting a warning for the one being currently processed. 1541 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1542 1543 logger.warning( 1544 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1545 ) 1546 1547 def _parse_command(self) -> exp.Command: 1548 self._warn_unsupported() 1549 return self.expression( 1550 exp.Command, 1551 comments=self._prev_comments, 1552 this=self._prev.text.upper(), 1553 expression=self._parse_string(), 1554 ) 1555 1556 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1557 """ 1558 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1559 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1560 solve this by setting & resetting the parser state accordingly 1561 """ 1562 index = self._index 1563 error_level = self.error_level 1564 1565 self.error_level = ErrorLevel.IMMEDIATE 1566 try: 1567 this = parse_method() 1568 except ParseError: 1569 this = None 1570 finally: 1571 if not this or retreat: 1572 self._retreat(index) 1573 self.error_level = error_level 1574 1575 return this 1576 1577 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1578 start = self._prev 1579 exists = self._parse_exists() if allow_exists else None 1580 1581 self._match(TokenType.ON) 1582 1583 materialized = self._match_text_seq("MATERIALIZED") 1584 kind = self._match_set(self.CREATABLES) and self._prev 1585 if not kind: 1586 return self._parse_as_command(start) 1587 1588 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1589 this = self._parse_user_defined_function(kind=kind.token_type) 1590 elif kind.token_type == TokenType.TABLE: 1591 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1592 elif kind.token_type == TokenType.COLUMN: 1593 this = self._parse_column() 1594 else: 1595 this = self._parse_id_var() 1596 1597 self._match(TokenType.IS) 1598 1599 return self.expression( 1600 exp.Comment, 1601 this=this, 1602 kind=kind.text, 1603 expression=self._parse_string(), 1604 exists=exists, 1605 materialized=materialized, 1606 ) 1607 1608 def _parse_to_table( 1609 self, 1610 ) -> exp.ToTableProperty: 1611 table = self._parse_table_parts(schema=True) 1612 return self.expression(exp.ToTableProperty, this=table) 1613 1614 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1615 def _parse_ttl(self) -> exp.Expression: 1616 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1617 this = self._parse_bitwise() 1618 1619 if self._match_text_seq("DELETE"): 1620 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1621 if self._match_text_seq("RECOMPRESS"): 1622 return self.expression( 1623 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1624 ) 1625 if self._match_text_seq("TO", "DISK"): 1626 return self.expression( 1627 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1628 ) 1629 if self._match_text_seq("TO", "VOLUME"): 1630 return self.expression( 1631 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1632 ) 1633 1634 return this 1635 1636 expressions = self._parse_csv(_parse_ttl_action) 1637 where = self._parse_where() 1638 group = self._parse_group() 1639 1640 aggregates = None 1641 if group and self._match(TokenType.SET): 1642 aggregates = self._parse_csv(self._parse_set_item) 1643 1644 return self.expression( 1645 exp.MergeTreeTTL, 1646 expressions=expressions, 1647 where=where, 1648 group=group, 1649 aggregates=aggregates, 1650 ) 1651 1652 def _parse_statement(self) -> t.Optional[exp.Expression]: 1653 if self._curr is None: 1654 return None 1655 1656 if self._match_set(self.STATEMENT_PARSERS): 1657 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1658 1659 if self._match_set(self.dialect.tokenizer.COMMANDS): 1660 return self._parse_command() 1661 1662 expression = self._parse_expression() 1663 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1664 return self._parse_query_modifiers(expression) 1665 1666 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1667 start = self._prev 1668 temporary = self._match(TokenType.TEMPORARY) 1669 materialized = self._match_text_seq("MATERIALIZED") 1670 1671 kind = self._match_set(self.CREATABLES) and self._prev.text 1672 if not kind: 1673 return self._parse_as_command(start) 1674 1675 if_exists = exists or self._parse_exists() 1676 table = self._parse_table_parts( 1677 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1678 ) 1679 1680 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1681 1682 if self._match(TokenType.L_PAREN, advance=False): 1683 expressions = self._parse_wrapped_csv(self._parse_types) 1684 else: 1685 expressions = None 1686 1687 return self.expression( 1688 exp.Drop, 1689 comments=start.comments, 1690 exists=if_exists, 1691 this=table, 1692 expressions=expressions, 1693 kind=kind.upper(), 1694 temporary=temporary, 1695 materialized=materialized, 1696 cascade=self._match_text_seq("CASCADE"), 1697 constraints=self._match_text_seq("CONSTRAINTS"), 1698 purge=self._match_text_seq("PURGE"), 1699 cluster=cluster, 1700 ) 1701 1702 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1703 return ( 1704 self._match_text_seq("IF") 1705 and (not not_ or self._match(TokenType.NOT)) 1706 and self._match(TokenType.EXISTS) 1707 ) 1708 1709 def _parse_create(self) -> exp.Create | exp.Command: 1710 # Note: this can't be None because we've matched a statement parser 1711 start = self._prev 1712 comments = self._prev_comments 1713 1714 replace = ( 1715 start.token_type == TokenType.REPLACE 1716 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1717 or self._match_pair(TokenType.OR, TokenType.ALTER) 1718 ) 1719 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1720 1721 unique = self._match(TokenType.UNIQUE) 1722 1723 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1724 clustered = True 1725 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1726 "COLUMNSTORE" 1727 ): 1728 clustered = False 1729 else: 1730 clustered = None 1731 1732 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1733 self._advance() 1734 1735 properties = None 1736 create_token = self._match_set(self.CREATABLES) and self._prev 1737 1738 if not create_token: 1739 # exp.Properties.Location.POST_CREATE 1740 properties = self._parse_properties() 1741 create_token = self._match_set(self.CREATABLES) and self._prev 1742 1743 if not properties or not create_token: 1744 return self._parse_as_command(start) 1745 1746 concurrently = self._match_text_seq("CONCURRENTLY") 1747 exists = self._parse_exists(not_=True) 1748 this = None 1749 expression: t.Optional[exp.Expression] = None 1750 indexes = None 1751 no_schema_binding = None 1752 begin = None 1753 end = None 1754 clone = None 1755 1756 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1757 nonlocal properties 1758 if properties and temp_props: 1759 properties.expressions.extend(temp_props.expressions) 1760 elif temp_props: 1761 properties = temp_props 1762 1763 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1764 this = self._parse_user_defined_function(kind=create_token.token_type) 1765 1766 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1767 extend_props(self._parse_properties()) 1768 1769 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1770 extend_props(self._parse_properties()) 1771 1772 if not expression: 1773 if self._match(TokenType.COMMAND): 1774 expression = self._parse_as_command(self._prev) 1775 else: 1776 begin = self._match(TokenType.BEGIN) 1777 return_ = self._match_text_seq("RETURN") 1778 1779 if self._match(TokenType.STRING, advance=False): 1780 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1781 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1782 expression = self._parse_string() 1783 extend_props(self._parse_properties()) 1784 else: 1785 expression = self._parse_statement() 1786 1787 end = self._match_text_seq("END") 1788 1789 if return_: 1790 expression = self.expression(exp.Return, this=expression) 1791 elif create_token.token_type == TokenType.INDEX: 1792 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1793 if not self._match(TokenType.ON): 1794 index = self._parse_id_var() 1795 anonymous = False 1796 else: 1797 index = None 1798 anonymous = True 1799 1800 this = self._parse_index(index=index, anonymous=anonymous) 1801 elif create_token.token_type in self.DB_CREATABLES: 1802 table_parts = self._parse_table_parts( 1803 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1804 ) 1805 1806 # exp.Properties.Location.POST_NAME 1807 self._match(TokenType.COMMA) 1808 extend_props(self._parse_properties(before=True)) 1809 1810 this = self._parse_schema(this=table_parts) 1811 1812 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1813 extend_props(self._parse_properties()) 1814 1815 self._match(TokenType.ALIAS) 1816 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1817 # exp.Properties.Location.POST_ALIAS 1818 extend_props(self._parse_properties()) 1819 1820 if create_token.token_type == TokenType.SEQUENCE: 1821 expression = self._parse_types() 1822 extend_props(self._parse_properties()) 1823 else: 1824 expression = self._parse_ddl_select() 1825 1826 if create_token.token_type == TokenType.TABLE: 1827 # exp.Properties.Location.POST_EXPRESSION 1828 extend_props(self._parse_properties()) 1829 1830 indexes = [] 1831 while True: 1832 index = self._parse_index() 1833 1834 # exp.Properties.Location.POST_INDEX 1835 extend_props(self._parse_properties()) 1836 if not index: 1837 break 1838 else: 1839 self._match(TokenType.COMMA) 1840 indexes.append(index) 1841 elif create_token.token_type == TokenType.VIEW: 1842 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1843 no_schema_binding = True 1844 1845 shallow = self._match_text_seq("SHALLOW") 1846 1847 if self._match_texts(self.CLONE_KEYWORDS): 1848 copy = self._prev.text.lower() == "copy" 1849 clone = self.expression( 1850 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1851 ) 1852 1853 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1854 return self._parse_as_command(start) 1855 1856 return self.expression( 1857 exp.Create, 1858 comments=comments, 1859 this=this, 1860 kind=create_token.text.upper(), 1861 replace=replace, 1862 refresh=refresh, 1863 unique=unique, 1864 expression=expression, 1865 exists=exists, 1866 properties=properties, 1867 indexes=indexes, 1868 no_schema_binding=no_schema_binding, 1869 begin=begin, 1870 end=end, 1871 clone=clone, 1872 concurrently=concurrently, 1873 clustered=clustered, 1874 ) 1875 1876 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1877 seq = exp.SequenceProperties() 1878 1879 options = [] 1880 index = self._index 1881 1882 while self._curr: 1883 self._match(TokenType.COMMA) 1884 if self._match_text_seq("INCREMENT"): 1885 self._match_text_seq("BY") 1886 self._match_text_seq("=") 1887 seq.set("increment", self._parse_term()) 1888 elif self._match_text_seq("MINVALUE"): 1889 seq.set("minvalue", self._parse_term()) 1890 elif self._match_text_seq("MAXVALUE"): 1891 seq.set("maxvalue", self._parse_term()) 1892 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1893 self._match_text_seq("=") 1894 seq.set("start", self._parse_term()) 1895 elif self._match_text_seq("CACHE"): 1896 # T-SQL allows empty CACHE which is initialized dynamically 1897 seq.set("cache", self._parse_number() or True) 1898 elif self._match_text_seq("OWNED", "BY"): 1899 # "OWNED BY NONE" is the default 1900 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1901 else: 1902 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1903 if opt: 1904 options.append(opt) 1905 else: 1906 break 1907 1908 seq.set("options", options if options else None) 1909 return None if self._index == index else seq 1910 1911 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1912 # only used for teradata currently 1913 self._match(TokenType.COMMA) 1914 1915 kwargs = { 1916 "no": self._match_text_seq("NO"), 1917 "dual": self._match_text_seq("DUAL"), 1918 "before": self._match_text_seq("BEFORE"), 1919 "default": self._match_text_seq("DEFAULT"), 1920 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1921 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1922 "after": self._match_text_seq("AFTER"), 1923 "minimum": self._match_texts(("MIN", "MINIMUM")), 1924 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1925 } 1926 1927 if self._match_texts(self.PROPERTY_PARSERS): 1928 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1929 try: 1930 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1931 except TypeError: 1932 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1933 1934 return None 1935 1936 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1937 return self._parse_wrapped_csv(self._parse_property) 1938 1939 def _parse_property(self) -> t.Optional[exp.Expression]: 1940 if self._match_texts(self.PROPERTY_PARSERS): 1941 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1942 1943 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1944 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1945 1946 if self._match_text_seq("COMPOUND", "SORTKEY"): 1947 return self._parse_sortkey(compound=True) 1948 1949 if self._match_text_seq("SQL", "SECURITY"): 1950 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1951 1952 index = self._index 1953 key = self._parse_column() 1954 1955 if not self._match(TokenType.EQ): 1956 self._retreat(index) 1957 return self._parse_sequence_properties() 1958 1959 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1960 if isinstance(key, exp.Column): 1961 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1962 1963 value = self._parse_bitwise() or self._parse_var(any_token=True) 1964 1965 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1966 if isinstance(value, exp.Column): 1967 value = exp.var(value.name) 1968 1969 return self.expression(exp.Property, this=key, value=value) 1970 1971 def _parse_stored(self) -> exp.FileFormatProperty: 1972 self._match(TokenType.ALIAS) 1973 1974 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1975 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1976 1977 return self.expression( 1978 exp.FileFormatProperty, 1979 this=( 1980 self.expression( 1981 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1982 ) 1983 if input_format or output_format 1984 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1985 ), 1986 ) 1987 1988 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1989 field = self._parse_field() 1990 if isinstance(field, exp.Identifier) and not field.quoted: 1991 field = exp.var(field) 1992 1993 return field 1994 1995 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1996 self._match(TokenType.EQ) 1997 self._match(TokenType.ALIAS) 1998 1999 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2000 2001 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2002 properties = [] 2003 while True: 2004 if before: 2005 prop = self._parse_property_before() 2006 else: 2007 prop = self._parse_property() 2008 if not prop: 2009 break 2010 for p in ensure_list(prop): 2011 properties.append(p) 2012 2013 if properties: 2014 return self.expression(exp.Properties, expressions=properties) 2015 2016 return None 2017 2018 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2019 return self.expression( 2020 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2021 ) 2022 2023 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2024 if self._index >= 2: 2025 pre_volatile_token = self._tokens[self._index - 2] 2026 else: 2027 pre_volatile_token = None 2028 2029 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2030 return exp.VolatileProperty() 2031 2032 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2033 2034 def _parse_retention_period(self) -> exp.Var: 2035 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2036 number = self._parse_number() 2037 number_str = f"{number} " if number else "" 2038 unit = self._parse_var(any_token=True) 2039 return exp.var(f"{number_str}{unit}") 2040 2041 def _parse_system_versioning_property( 2042 self, with_: bool = False 2043 ) -> exp.WithSystemVersioningProperty: 2044 self._match(TokenType.EQ) 2045 prop = self.expression( 2046 exp.WithSystemVersioningProperty, 2047 **{ # type: ignore 2048 "on": True, 2049 "with": with_, 2050 }, 2051 ) 2052 2053 if self._match_text_seq("OFF"): 2054 prop.set("on", False) 2055 return prop 2056 2057 self._match(TokenType.ON) 2058 if self._match(TokenType.L_PAREN): 2059 while self._curr and not self._match(TokenType.R_PAREN): 2060 if self._match_text_seq("HISTORY_TABLE", "="): 2061 prop.set("this", self._parse_table_parts()) 2062 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2063 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2064 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2065 prop.set("retention_period", self._parse_retention_period()) 2066 2067 self._match(TokenType.COMMA) 2068 2069 return prop 2070 2071 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2072 self._match(TokenType.EQ) 2073 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2074 prop = self.expression(exp.DataDeletionProperty, on=on) 2075 2076 if self._match(TokenType.L_PAREN): 2077 while self._curr and not self._match(TokenType.R_PAREN): 2078 if self._match_text_seq("FILTER_COLUMN", "="): 2079 prop.set("filter_column", self._parse_column()) 2080 elif self._match_text_seq("RETENTION_PERIOD", "="): 2081 prop.set("retention_period", self._parse_retention_period()) 2082 2083 self._match(TokenType.COMMA) 2084 2085 return prop 2086 2087 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2088 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2089 prop = self._parse_system_versioning_property(with_=True) 2090 self._match_r_paren() 2091 return prop 2092 2093 if self._match(TokenType.L_PAREN, advance=False): 2094 return self._parse_wrapped_properties() 2095 2096 if self._match_text_seq("JOURNAL"): 2097 return self._parse_withjournaltable() 2098 2099 if self._match_texts(self.VIEW_ATTRIBUTES): 2100 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2101 2102 if self._match_text_seq("DATA"): 2103 return self._parse_withdata(no=False) 2104 elif self._match_text_seq("NO", "DATA"): 2105 return self._parse_withdata(no=True) 2106 2107 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2108 return self._parse_serde_properties(with_=True) 2109 2110 if self._match(TokenType.SCHEMA): 2111 return self.expression( 2112 exp.WithSchemaBindingProperty, 2113 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2114 ) 2115 2116 if not self._next: 2117 return None 2118 2119 return self._parse_withisolatedloading() 2120 2121 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2122 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2123 self._match(TokenType.EQ) 2124 2125 user = self._parse_id_var() 2126 self._match(TokenType.PARAMETER) 2127 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2128 2129 if not user or not host: 2130 return None 2131 2132 return exp.DefinerProperty(this=f"{user}@{host}") 2133 2134 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2135 self._match(TokenType.TABLE) 2136 self._match(TokenType.EQ) 2137 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2138 2139 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2140 return self.expression(exp.LogProperty, no=no) 2141 2142 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2143 return self.expression(exp.JournalProperty, **kwargs) 2144 2145 def _parse_checksum(self) -> exp.ChecksumProperty: 2146 self._match(TokenType.EQ) 2147 2148 on = None 2149 if self._match(TokenType.ON): 2150 on = True 2151 elif self._match_text_seq("OFF"): 2152 on = False 2153 2154 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2155 2156 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2157 return self.expression( 2158 exp.Cluster, 2159 expressions=( 2160 self._parse_wrapped_csv(self._parse_ordered) 2161 if wrapped 2162 else self._parse_csv(self._parse_ordered) 2163 ), 2164 ) 2165 2166 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2167 self._match_text_seq("BY") 2168 2169 self._match_l_paren() 2170 expressions = self._parse_csv(self._parse_column) 2171 self._match_r_paren() 2172 2173 if self._match_text_seq("SORTED", "BY"): 2174 self._match_l_paren() 2175 sorted_by = self._parse_csv(self._parse_ordered) 2176 self._match_r_paren() 2177 else: 2178 sorted_by = None 2179 2180 self._match(TokenType.INTO) 2181 buckets = self._parse_number() 2182 self._match_text_seq("BUCKETS") 2183 2184 return self.expression( 2185 exp.ClusteredByProperty, 2186 expressions=expressions, 2187 sorted_by=sorted_by, 2188 buckets=buckets, 2189 ) 2190 2191 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2192 if not self._match_text_seq("GRANTS"): 2193 self._retreat(self._index - 1) 2194 return None 2195 2196 return self.expression(exp.CopyGrantsProperty) 2197 2198 def _parse_freespace(self) -> exp.FreespaceProperty: 2199 self._match(TokenType.EQ) 2200 return self.expression( 2201 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2202 ) 2203 2204 def _parse_mergeblockratio( 2205 self, no: bool = False, default: bool = False 2206 ) -> exp.MergeBlockRatioProperty: 2207 if self._match(TokenType.EQ): 2208 return self.expression( 2209 exp.MergeBlockRatioProperty, 2210 this=self._parse_number(), 2211 percent=self._match(TokenType.PERCENT), 2212 ) 2213 2214 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2215 2216 def _parse_datablocksize( 2217 self, 2218 default: t.Optional[bool] = None, 2219 minimum: t.Optional[bool] = None, 2220 maximum: t.Optional[bool] = None, 2221 ) -> exp.DataBlocksizeProperty: 2222 self._match(TokenType.EQ) 2223 size = self._parse_number() 2224 2225 units = None 2226 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2227 units = self._prev.text 2228 2229 return self.expression( 2230 exp.DataBlocksizeProperty, 2231 size=size, 2232 units=units, 2233 default=default, 2234 minimum=minimum, 2235 maximum=maximum, 2236 ) 2237 2238 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2239 self._match(TokenType.EQ) 2240 always = self._match_text_seq("ALWAYS") 2241 manual = self._match_text_seq("MANUAL") 2242 never = self._match_text_seq("NEVER") 2243 default = self._match_text_seq("DEFAULT") 2244 2245 autotemp = None 2246 if self._match_text_seq("AUTOTEMP"): 2247 autotemp = self._parse_schema() 2248 2249 return self.expression( 2250 exp.BlockCompressionProperty, 2251 always=always, 2252 manual=manual, 2253 never=never, 2254 default=default, 2255 autotemp=autotemp, 2256 ) 2257 2258 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2259 index = self._index 2260 no = self._match_text_seq("NO") 2261 concurrent = self._match_text_seq("CONCURRENT") 2262 2263 if not self._match_text_seq("ISOLATED", "LOADING"): 2264 self._retreat(index) 2265 return None 2266 2267 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2268 return self.expression( 2269 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2270 ) 2271 2272 def _parse_locking(self) -> exp.LockingProperty: 2273 if self._match(TokenType.TABLE): 2274 kind = "TABLE" 2275 elif self._match(TokenType.VIEW): 2276 kind = "VIEW" 2277 elif self._match(TokenType.ROW): 2278 kind = "ROW" 2279 elif self._match_text_seq("DATABASE"): 2280 kind = "DATABASE" 2281 else: 2282 kind = None 2283 2284 if kind in ("DATABASE", "TABLE", "VIEW"): 2285 this = self._parse_table_parts() 2286 else: 2287 this = None 2288 2289 if self._match(TokenType.FOR): 2290 for_or_in = "FOR" 2291 elif self._match(TokenType.IN): 2292 for_or_in = "IN" 2293 else: 2294 for_or_in = None 2295 2296 if self._match_text_seq("ACCESS"): 2297 lock_type = "ACCESS" 2298 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2299 lock_type = "EXCLUSIVE" 2300 elif self._match_text_seq("SHARE"): 2301 lock_type = "SHARE" 2302 elif self._match_text_seq("READ"): 2303 lock_type = "READ" 2304 elif self._match_text_seq("WRITE"): 2305 lock_type = "WRITE" 2306 elif self._match_text_seq("CHECKSUM"): 2307 lock_type = "CHECKSUM" 2308 else: 2309 lock_type = None 2310 2311 override = self._match_text_seq("OVERRIDE") 2312 2313 return self.expression( 2314 exp.LockingProperty, 2315 this=this, 2316 kind=kind, 2317 for_or_in=for_or_in, 2318 lock_type=lock_type, 2319 override=override, 2320 ) 2321 2322 def _parse_partition_by(self) -> t.List[exp.Expression]: 2323 if self._match(TokenType.PARTITION_BY): 2324 return self._parse_csv(self._parse_assignment) 2325 return [] 2326 2327 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2328 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2329 if self._match_text_seq("MINVALUE"): 2330 return exp.var("MINVALUE") 2331 if self._match_text_seq("MAXVALUE"): 2332 return exp.var("MAXVALUE") 2333 return self._parse_bitwise() 2334 2335 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2336 expression = None 2337 from_expressions = None 2338 to_expressions = None 2339 2340 if self._match(TokenType.IN): 2341 this = self._parse_wrapped_csv(self._parse_bitwise) 2342 elif self._match(TokenType.FROM): 2343 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2344 self._match_text_seq("TO") 2345 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2346 elif self._match_text_seq("WITH", "(", "MODULUS"): 2347 this = self._parse_number() 2348 self._match_text_seq(",", "REMAINDER") 2349 expression = self._parse_number() 2350 self._match_r_paren() 2351 else: 2352 self.raise_error("Failed to parse partition bound spec.") 2353 2354 return self.expression( 2355 exp.PartitionBoundSpec, 2356 this=this, 2357 expression=expression, 2358 from_expressions=from_expressions, 2359 to_expressions=to_expressions, 2360 ) 2361 2362 # https://www.postgresql.org/docs/current/sql-createtable.html 2363 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2364 if not self._match_text_seq("OF"): 2365 self._retreat(self._index - 1) 2366 return None 2367 2368 this = self._parse_table(schema=True) 2369 2370 if self._match(TokenType.DEFAULT): 2371 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2372 elif self._match_text_seq("FOR", "VALUES"): 2373 expression = self._parse_partition_bound_spec() 2374 else: 2375 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2376 2377 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2378 2379 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2380 self._match(TokenType.EQ) 2381 return self.expression( 2382 exp.PartitionedByProperty, 2383 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2384 ) 2385 2386 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2387 if self._match_text_seq("AND", "STATISTICS"): 2388 statistics = True 2389 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2390 statistics = False 2391 else: 2392 statistics = None 2393 2394 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2395 2396 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2397 if self._match_text_seq("SQL"): 2398 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2399 return None 2400 2401 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2402 if self._match_text_seq("SQL", "DATA"): 2403 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2404 return None 2405 2406 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2407 if self._match_text_seq("PRIMARY", "INDEX"): 2408 return exp.NoPrimaryIndexProperty() 2409 if self._match_text_seq("SQL"): 2410 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2411 return None 2412 2413 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2414 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2415 return exp.OnCommitProperty() 2416 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2417 return exp.OnCommitProperty(delete=True) 2418 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2419 2420 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2421 if self._match_text_seq("SQL", "DATA"): 2422 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2423 return None 2424 2425 def _parse_distkey(self) -> exp.DistKeyProperty: 2426 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2427 2428 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2429 table = self._parse_table(schema=True) 2430 2431 options = [] 2432 while self._match_texts(("INCLUDING", "EXCLUDING")): 2433 this = self._prev.text.upper() 2434 2435 id_var = self._parse_id_var() 2436 if not id_var: 2437 return None 2438 2439 options.append( 2440 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2441 ) 2442 2443 return self.expression(exp.LikeProperty, this=table, expressions=options) 2444 2445 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2446 return self.expression( 2447 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2448 ) 2449 2450 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2451 self._match(TokenType.EQ) 2452 return self.expression( 2453 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2454 ) 2455 2456 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2457 self._match_text_seq("WITH", "CONNECTION") 2458 return self.expression( 2459 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2460 ) 2461 2462 def _parse_returns(self) -> exp.ReturnsProperty: 2463 value: t.Optional[exp.Expression] 2464 null = None 2465 is_table = self._match(TokenType.TABLE) 2466 2467 if is_table: 2468 if self._match(TokenType.LT): 2469 value = self.expression( 2470 exp.Schema, 2471 this="TABLE", 2472 expressions=self._parse_csv(self._parse_struct_types), 2473 ) 2474 if not self._match(TokenType.GT): 2475 self.raise_error("Expecting >") 2476 else: 2477 value = self._parse_schema(exp.var("TABLE")) 2478 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2479 null = True 2480 value = None 2481 else: 2482 value = self._parse_types() 2483 2484 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2485 2486 def _parse_describe(self) -> exp.Describe: 2487 kind = self._match_set(self.CREATABLES) and self._prev.text 2488 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2489 if self._match(TokenType.DOT): 2490 style = None 2491 self._retreat(self._index - 2) 2492 this = self._parse_table(schema=True) 2493 properties = self._parse_properties() 2494 expressions = properties.expressions if properties else None 2495 return self.expression( 2496 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2497 ) 2498 2499 def _parse_insert(self) -> exp.Insert: 2500 comments = ensure_list(self._prev_comments) 2501 hint = self._parse_hint() 2502 overwrite = self._match(TokenType.OVERWRITE) 2503 ignore = self._match(TokenType.IGNORE) 2504 local = self._match_text_seq("LOCAL") 2505 alternative = None 2506 is_function = None 2507 2508 if self._match_text_seq("DIRECTORY"): 2509 this: t.Optional[exp.Expression] = self.expression( 2510 exp.Directory, 2511 this=self._parse_var_or_string(), 2512 local=local, 2513 row_format=self._parse_row_format(match_row=True), 2514 ) 2515 else: 2516 if self._match(TokenType.OR): 2517 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2518 2519 self._match(TokenType.INTO) 2520 comments += ensure_list(self._prev_comments) 2521 self._match(TokenType.TABLE) 2522 is_function = self._match(TokenType.FUNCTION) 2523 2524 this = ( 2525 self._parse_table(schema=True, parse_partition=True) 2526 if not is_function 2527 else self._parse_function() 2528 ) 2529 2530 returning = self._parse_returning() 2531 2532 return self.expression( 2533 exp.Insert, 2534 comments=comments, 2535 hint=hint, 2536 is_function=is_function, 2537 this=this, 2538 stored=self._match_text_seq("STORED") and self._parse_stored(), 2539 by_name=self._match_text_seq("BY", "NAME"), 2540 exists=self._parse_exists(), 2541 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2542 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2543 conflict=self._parse_on_conflict(), 2544 returning=returning or self._parse_returning(), 2545 overwrite=overwrite, 2546 alternative=alternative, 2547 ignore=ignore, 2548 ) 2549 2550 def _parse_kill(self) -> exp.Kill: 2551 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2552 2553 return self.expression( 2554 exp.Kill, 2555 this=self._parse_primary(), 2556 kind=kind, 2557 ) 2558 2559 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2560 conflict = self._match_text_seq("ON", "CONFLICT") 2561 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2562 2563 if not conflict and not duplicate: 2564 return None 2565 2566 conflict_keys = None 2567 constraint = None 2568 2569 if conflict: 2570 if self._match_text_seq("ON", "CONSTRAINT"): 2571 constraint = self._parse_id_var() 2572 elif self._match(TokenType.L_PAREN): 2573 conflict_keys = self._parse_csv(self._parse_id_var) 2574 self._match_r_paren() 2575 2576 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2577 if self._prev.token_type == TokenType.UPDATE: 2578 self._match(TokenType.SET) 2579 expressions = self._parse_csv(self._parse_equality) 2580 else: 2581 expressions = None 2582 2583 return self.expression( 2584 exp.OnConflict, 2585 duplicate=duplicate, 2586 expressions=expressions, 2587 action=action, 2588 conflict_keys=conflict_keys, 2589 constraint=constraint, 2590 ) 2591 2592 def _parse_returning(self) -> t.Optional[exp.Returning]: 2593 if not self._match(TokenType.RETURNING): 2594 return None 2595 return self.expression( 2596 exp.Returning, 2597 expressions=self._parse_csv(self._parse_expression), 2598 into=self._match(TokenType.INTO) and self._parse_table_part(), 2599 ) 2600 2601 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2602 if not self._match(TokenType.FORMAT): 2603 return None 2604 return self._parse_row_format() 2605 2606 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2607 index = self._index 2608 with_ = with_ or self._match_text_seq("WITH") 2609 2610 if not self._match(TokenType.SERDE_PROPERTIES): 2611 self._retreat(index) 2612 return None 2613 return self.expression( 2614 exp.SerdeProperties, 2615 **{ # type: ignore 2616 "expressions": self._parse_wrapped_properties(), 2617 "with": with_, 2618 }, 2619 ) 2620 2621 def _parse_row_format( 2622 self, match_row: bool = False 2623 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2624 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2625 return None 2626 2627 if self._match_text_seq("SERDE"): 2628 this = self._parse_string() 2629 2630 serde_properties = self._parse_serde_properties() 2631 2632 return self.expression( 2633 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2634 ) 2635 2636 self._match_text_seq("DELIMITED") 2637 2638 kwargs = {} 2639 2640 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2641 kwargs["fields"] = self._parse_string() 2642 if self._match_text_seq("ESCAPED", "BY"): 2643 kwargs["escaped"] = self._parse_string() 2644 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2645 kwargs["collection_items"] = self._parse_string() 2646 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2647 kwargs["map_keys"] = self._parse_string() 2648 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2649 kwargs["lines"] = self._parse_string() 2650 if self._match_text_seq("NULL", "DEFINED", "AS"): 2651 kwargs["null"] = self._parse_string() 2652 2653 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2654 2655 def _parse_load(self) -> exp.LoadData | exp.Command: 2656 if self._match_text_seq("DATA"): 2657 local = self._match_text_seq("LOCAL") 2658 self._match_text_seq("INPATH") 2659 inpath = self._parse_string() 2660 overwrite = self._match(TokenType.OVERWRITE) 2661 self._match_pair(TokenType.INTO, TokenType.TABLE) 2662 2663 return self.expression( 2664 exp.LoadData, 2665 this=self._parse_table(schema=True), 2666 local=local, 2667 overwrite=overwrite, 2668 inpath=inpath, 2669 partition=self._parse_partition(), 2670 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2671 serde=self._match_text_seq("SERDE") and self._parse_string(), 2672 ) 2673 return self._parse_as_command(self._prev) 2674 2675 def _parse_delete(self) -> exp.Delete: 2676 # This handles MySQL's "Multiple-Table Syntax" 2677 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2678 tables = None 2679 comments = self._prev_comments 2680 if not self._match(TokenType.FROM, advance=False): 2681 tables = self._parse_csv(self._parse_table) or None 2682 2683 returning = self._parse_returning() 2684 2685 return self.expression( 2686 exp.Delete, 2687 comments=comments, 2688 tables=tables, 2689 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2690 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2691 where=self._parse_where(), 2692 returning=returning or self._parse_returning(), 2693 limit=self._parse_limit(), 2694 ) 2695 2696 def _parse_update(self) -> exp.Update: 2697 comments = self._prev_comments 2698 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2699 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2700 returning = self._parse_returning() 2701 return self.expression( 2702 exp.Update, 2703 comments=comments, 2704 **{ # type: ignore 2705 "this": this, 2706 "expressions": expressions, 2707 "from": self._parse_from(joins=True), 2708 "where": self._parse_where(), 2709 "returning": returning or self._parse_returning(), 2710 "order": self._parse_order(), 2711 "limit": self._parse_limit(), 2712 }, 2713 ) 2714 2715 def _parse_uncache(self) -> exp.Uncache: 2716 if not self._match(TokenType.TABLE): 2717 self.raise_error("Expecting TABLE after UNCACHE") 2718 2719 return self.expression( 2720 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2721 ) 2722 2723 def _parse_cache(self) -> exp.Cache: 2724 lazy = self._match_text_seq("LAZY") 2725 self._match(TokenType.TABLE) 2726 table = self._parse_table(schema=True) 2727 2728 options = [] 2729 if self._match_text_seq("OPTIONS"): 2730 self._match_l_paren() 2731 k = self._parse_string() 2732 self._match(TokenType.EQ) 2733 v = self._parse_string() 2734 options = [k, v] 2735 self._match_r_paren() 2736 2737 self._match(TokenType.ALIAS) 2738 return self.expression( 2739 exp.Cache, 2740 this=table, 2741 lazy=lazy, 2742 options=options, 2743 expression=self._parse_select(nested=True), 2744 ) 2745 2746 def _parse_partition(self) -> t.Optional[exp.Partition]: 2747 if not self._match(TokenType.PARTITION): 2748 return None 2749 2750 return self.expression( 2751 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2752 ) 2753 2754 def _parse_value(self) -> t.Optional[exp.Tuple]: 2755 if self._match(TokenType.L_PAREN): 2756 expressions = self._parse_csv(self._parse_expression) 2757 self._match_r_paren() 2758 return self.expression(exp.Tuple, expressions=expressions) 2759 2760 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2761 expression = self._parse_expression() 2762 if expression: 2763 return self.expression(exp.Tuple, expressions=[expression]) 2764 return None 2765 2766 def _parse_projections(self) -> t.List[exp.Expression]: 2767 return self._parse_expressions() 2768 2769 def _parse_select( 2770 self, 2771 nested: bool = False, 2772 table: bool = False, 2773 parse_subquery_alias: bool = True, 2774 parse_set_operation: bool = True, 2775 ) -> t.Optional[exp.Expression]: 2776 cte = self._parse_with() 2777 2778 if cte: 2779 this = self._parse_statement() 2780 2781 if not this: 2782 self.raise_error("Failed to parse any statement following CTE") 2783 return cte 2784 2785 if "with" in this.arg_types: 2786 this.set("with", cte) 2787 else: 2788 self.raise_error(f"{this.key} does not support CTE") 2789 this = cte 2790 2791 return this 2792 2793 # duckdb supports leading with FROM x 2794 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2795 2796 if self._match(TokenType.SELECT): 2797 comments = self._prev_comments 2798 2799 hint = self._parse_hint() 2800 2801 if self._next and not self._next.token_type == TokenType.DOT: 2802 all_ = self._match(TokenType.ALL) 2803 distinct = self._match_set(self.DISTINCT_TOKENS) 2804 else: 2805 all_, distinct = None, None 2806 2807 kind = ( 2808 self._match(TokenType.ALIAS) 2809 and self._match_texts(("STRUCT", "VALUE")) 2810 and self._prev.text.upper() 2811 ) 2812 2813 if distinct: 2814 distinct = self.expression( 2815 exp.Distinct, 2816 on=self._parse_value() if self._match(TokenType.ON) else None, 2817 ) 2818 2819 if all_ and distinct: 2820 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2821 2822 limit = self._parse_limit(top=True) 2823 projections = self._parse_projections() 2824 2825 this = self.expression( 2826 exp.Select, 2827 kind=kind, 2828 hint=hint, 2829 distinct=distinct, 2830 expressions=projections, 2831 limit=limit, 2832 ) 2833 this.comments = comments 2834 2835 into = self._parse_into() 2836 if into: 2837 this.set("into", into) 2838 2839 if not from_: 2840 from_ = self._parse_from() 2841 2842 if from_: 2843 this.set("from", from_) 2844 2845 this = self._parse_query_modifiers(this) 2846 elif (table or nested) and self._match(TokenType.L_PAREN): 2847 if self._match(TokenType.PIVOT): 2848 this = self._parse_simplified_pivot() 2849 elif self._match(TokenType.FROM): 2850 this = exp.select("*").from_( 2851 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2852 ) 2853 else: 2854 this = ( 2855 self._parse_table() 2856 if table 2857 else self._parse_select(nested=True, parse_set_operation=False) 2858 ) 2859 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2860 2861 self._match_r_paren() 2862 2863 # We return early here so that the UNION isn't attached to the subquery by the 2864 # following call to _parse_set_operations, but instead becomes the parent node 2865 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2866 elif self._match(TokenType.VALUES, advance=False): 2867 this = self._parse_derived_table_values() 2868 elif from_: 2869 this = exp.select("*").from_(from_.this, copy=False) 2870 elif self._match(TokenType.SUMMARIZE): 2871 table = self._match(TokenType.TABLE) 2872 this = self._parse_select() or self._parse_string() or self._parse_table() 2873 return self.expression(exp.Summarize, this=this, table=table) 2874 elif self._match(TokenType.DESCRIBE): 2875 this = self._parse_describe() 2876 elif self._match_text_seq("STREAM"): 2877 this = self.expression(exp.Stream, this=self._parse_function()) 2878 else: 2879 this = None 2880 2881 return self._parse_set_operations(this) if parse_set_operation else this 2882 2883 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2884 if not skip_with_token and not self._match(TokenType.WITH): 2885 return None 2886 2887 comments = self._prev_comments 2888 recursive = self._match(TokenType.RECURSIVE) 2889 2890 expressions = [] 2891 while True: 2892 expressions.append(self._parse_cte()) 2893 2894 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2895 break 2896 else: 2897 self._match(TokenType.WITH) 2898 2899 return self.expression( 2900 exp.With, comments=comments, expressions=expressions, recursive=recursive 2901 ) 2902 2903 def _parse_cte(self) -> exp.CTE: 2904 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2905 if not alias or not alias.this: 2906 self.raise_error("Expected CTE to have alias") 2907 2908 self._match(TokenType.ALIAS) 2909 comments = self._prev_comments 2910 2911 if self._match_text_seq("NOT", "MATERIALIZED"): 2912 materialized = False 2913 elif self._match_text_seq("MATERIALIZED"): 2914 materialized = True 2915 else: 2916 materialized = None 2917 2918 return self.expression( 2919 exp.CTE, 2920 this=self._parse_wrapped(self._parse_statement), 2921 alias=alias, 2922 materialized=materialized, 2923 comments=comments, 2924 ) 2925 2926 def _parse_table_alias( 2927 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2928 ) -> t.Optional[exp.TableAlias]: 2929 any_token = self._match(TokenType.ALIAS) 2930 alias = ( 2931 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2932 or self._parse_string_as_identifier() 2933 ) 2934 2935 index = self._index 2936 if self._match(TokenType.L_PAREN): 2937 columns = self._parse_csv(self._parse_function_parameter) 2938 self._match_r_paren() if columns else self._retreat(index) 2939 else: 2940 columns = None 2941 2942 if not alias and not columns: 2943 return None 2944 2945 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2946 2947 # We bubble up comments from the Identifier to the TableAlias 2948 if isinstance(alias, exp.Identifier): 2949 table_alias.add_comments(alias.pop_comments()) 2950 2951 return table_alias 2952 2953 def _parse_subquery( 2954 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2955 ) -> t.Optional[exp.Subquery]: 2956 if not this: 2957 return None 2958 2959 return self.expression( 2960 exp.Subquery, 2961 this=this, 2962 pivots=self._parse_pivots(), 2963 alias=self._parse_table_alias() if parse_alias else None, 2964 ) 2965 2966 def _implicit_unnests_to_explicit(self, this: E) -> E: 2967 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2968 2969 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2970 for i, join in enumerate(this.args.get("joins") or []): 2971 table = join.this 2972 normalized_table = table.copy() 2973 normalized_table.meta["maybe_column"] = True 2974 normalized_table = _norm(normalized_table, dialect=self.dialect) 2975 2976 if isinstance(table, exp.Table) and not join.args.get("on"): 2977 if normalized_table.parts[0].name in refs: 2978 table_as_column = table.to_column() 2979 unnest = exp.Unnest(expressions=[table_as_column]) 2980 2981 # Table.to_column creates a parent Alias node that we want to convert to 2982 # a TableAlias and attach to the Unnest, so it matches the parser's output 2983 if isinstance(table.args.get("alias"), exp.TableAlias): 2984 table_as_column.replace(table_as_column.this) 2985 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2986 2987 table.replace(unnest) 2988 2989 refs.add(normalized_table.alias_or_name) 2990 2991 return this 2992 2993 def _parse_query_modifiers( 2994 self, this: t.Optional[exp.Expression] 2995 ) -> t.Optional[exp.Expression]: 2996 if isinstance(this, (exp.Query, exp.Table)): 2997 for join in self._parse_joins(): 2998 this.append("joins", join) 2999 for lateral in iter(self._parse_lateral, None): 3000 this.append("laterals", lateral) 3001 3002 while True: 3003 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3004 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3005 key, expression = parser(self) 3006 3007 if expression: 3008 this.set(key, expression) 3009 if key == "limit": 3010 offset = expression.args.pop("offset", None) 3011 3012 if offset: 3013 offset = exp.Offset(expression=offset) 3014 this.set("offset", offset) 3015 3016 limit_by_expressions = expression.expressions 3017 expression.set("expressions", None) 3018 offset.set("expressions", limit_by_expressions) 3019 continue 3020 break 3021 3022 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3023 this = self._implicit_unnests_to_explicit(this) 3024 3025 return this 3026 3027 def _parse_hint(self) -> t.Optional[exp.Hint]: 3028 if self._match(TokenType.HINT): 3029 hints = [] 3030 for hint in iter( 3031 lambda: self._parse_csv( 3032 lambda: self._parse_function() or self._parse_var(upper=True) 3033 ), 3034 [], 3035 ): 3036 hints.extend(hint) 3037 3038 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3039 self.raise_error("Expected */ after HINT") 3040 3041 return self.expression(exp.Hint, expressions=hints) 3042 3043 return None 3044 3045 def _parse_into(self) -> t.Optional[exp.Into]: 3046 if not self._match(TokenType.INTO): 3047 return None 3048 3049 temp = self._match(TokenType.TEMPORARY) 3050 unlogged = self._match_text_seq("UNLOGGED") 3051 self._match(TokenType.TABLE) 3052 3053 return self.expression( 3054 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3055 ) 3056 3057 def _parse_from( 3058 self, joins: bool = False, skip_from_token: bool = False 3059 ) -> t.Optional[exp.From]: 3060 if not skip_from_token and not self._match(TokenType.FROM): 3061 return None 3062 3063 return self.expression( 3064 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3065 ) 3066 3067 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3068 return self.expression( 3069 exp.MatchRecognizeMeasure, 3070 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3071 this=self._parse_expression(), 3072 ) 3073 3074 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3075 if not self._match(TokenType.MATCH_RECOGNIZE): 3076 return None 3077 3078 self._match_l_paren() 3079 3080 partition = self._parse_partition_by() 3081 order = self._parse_order() 3082 3083 measures = ( 3084 self._parse_csv(self._parse_match_recognize_measure) 3085 if self._match_text_seq("MEASURES") 3086 else None 3087 ) 3088 3089 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3090 rows = exp.var("ONE ROW PER MATCH") 3091 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3092 text = "ALL ROWS PER MATCH" 3093 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3094 text += " SHOW EMPTY MATCHES" 3095 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3096 text += " OMIT EMPTY MATCHES" 3097 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3098 text += " WITH UNMATCHED ROWS" 3099 rows = exp.var(text) 3100 else: 3101 rows = None 3102 3103 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3104 text = "AFTER MATCH SKIP" 3105 if self._match_text_seq("PAST", "LAST", "ROW"): 3106 text += " PAST LAST ROW" 3107 elif self._match_text_seq("TO", "NEXT", "ROW"): 3108 text += " TO NEXT ROW" 3109 elif self._match_text_seq("TO", "FIRST"): 3110 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3111 elif self._match_text_seq("TO", "LAST"): 3112 text += f" TO LAST {self._advance_any().text}" # type: ignore 3113 after = exp.var(text) 3114 else: 3115 after = None 3116 3117 if self._match_text_seq("PATTERN"): 3118 self._match_l_paren() 3119 3120 if not self._curr: 3121 self.raise_error("Expecting )", self._curr) 3122 3123 paren = 1 3124 start = self._curr 3125 3126 while self._curr and paren > 0: 3127 if self._curr.token_type == TokenType.L_PAREN: 3128 paren += 1 3129 if self._curr.token_type == TokenType.R_PAREN: 3130 paren -= 1 3131 3132 end = self._prev 3133 self._advance() 3134 3135 if paren > 0: 3136 self.raise_error("Expecting )", self._curr) 3137 3138 pattern = exp.var(self._find_sql(start, end)) 3139 else: 3140 pattern = None 3141 3142 define = ( 3143 self._parse_csv(self._parse_name_as_expression) 3144 if self._match_text_seq("DEFINE") 3145 else None 3146 ) 3147 3148 self._match_r_paren() 3149 3150 return self.expression( 3151 exp.MatchRecognize, 3152 partition_by=partition, 3153 order=order, 3154 measures=measures, 3155 rows=rows, 3156 after=after, 3157 pattern=pattern, 3158 define=define, 3159 alias=self._parse_table_alias(), 3160 ) 3161 3162 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3163 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3164 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3165 cross_apply = False 3166 3167 if cross_apply is not None: 3168 this = self._parse_select(table=True) 3169 view = None 3170 outer = None 3171 elif self._match(TokenType.LATERAL): 3172 this = self._parse_select(table=True) 3173 view = self._match(TokenType.VIEW) 3174 outer = self._match(TokenType.OUTER) 3175 else: 3176 return None 3177 3178 if not this: 3179 this = ( 3180 self._parse_unnest() 3181 or self._parse_function() 3182 or self._parse_id_var(any_token=False) 3183 ) 3184 3185 while self._match(TokenType.DOT): 3186 this = exp.Dot( 3187 this=this, 3188 expression=self._parse_function() or self._parse_id_var(any_token=False), 3189 ) 3190 3191 if view: 3192 table = self._parse_id_var(any_token=False) 3193 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3194 table_alias: t.Optional[exp.TableAlias] = self.expression( 3195 exp.TableAlias, this=table, columns=columns 3196 ) 3197 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3198 # We move the alias from the lateral's child node to the lateral itself 3199 table_alias = this.args["alias"].pop() 3200 else: 3201 table_alias = self._parse_table_alias() 3202 3203 return self.expression( 3204 exp.Lateral, 3205 this=this, 3206 view=view, 3207 outer=outer, 3208 alias=table_alias, 3209 cross_apply=cross_apply, 3210 ) 3211 3212 def _parse_join_parts( 3213 self, 3214 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3215 return ( 3216 self._match_set(self.JOIN_METHODS) and self._prev, 3217 self._match_set(self.JOIN_SIDES) and self._prev, 3218 self._match_set(self.JOIN_KINDS) and self._prev, 3219 ) 3220 3221 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3222 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3223 this = self._parse_column() 3224 if isinstance(this, exp.Column): 3225 return this.this 3226 return this 3227 3228 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3229 3230 def _parse_join( 3231 self, skip_join_token: bool = False, parse_bracket: bool = False 3232 ) -> t.Optional[exp.Join]: 3233 if self._match(TokenType.COMMA): 3234 return self.expression(exp.Join, this=self._parse_table()) 3235 3236 index = self._index 3237 method, side, kind = self._parse_join_parts() 3238 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3239 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3240 3241 if not skip_join_token and not join: 3242 self._retreat(index) 3243 kind = None 3244 method = None 3245 side = None 3246 3247 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3248 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3249 3250 if not skip_join_token and not join and not outer_apply and not cross_apply: 3251 return None 3252 3253 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3254 3255 if method: 3256 kwargs["method"] = method.text 3257 if side: 3258 kwargs["side"] = side.text 3259 if kind: 3260 kwargs["kind"] = kind.text 3261 if hint: 3262 kwargs["hint"] = hint 3263 3264 if self._match(TokenType.MATCH_CONDITION): 3265 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3266 3267 if self._match(TokenType.ON): 3268 kwargs["on"] = self._parse_assignment() 3269 elif self._match(TokenType.USING): 3270 kwargs["using"] = self._parse_using_identifiers() 3271 elif ( 3272 not (outer_apply or cross_apply) 3273 and not isinstance(kwargs["this"], exp.Unnest) 3274 and not (kind and kind.token_type == TokenType.CROSS) 3275 ): 3276 index = self._index 3277 joins: t.Optional[list] = list(self._parse_joins()) 3278 3279 if joins and self._match(TokenType.ON): 3280 kwargs["on"] = self._parse_assignment() 3281 elif joins and self._match(TokenType.USING): 3282 kwargs["using"] = self._parse_using_identifiers() 3283 else: 3284 joins = None 3285 self._retreat(index) 3286 3287 kwargs["this"].set("joins", joins if joins else None) 3288 3289 comments = [c for token in (method, side, kind) if token for c in token.comments] 3290 return self.expression(exp.Join, comments=comments, **kwargs) 3291 3292 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3293 this = self._parse_assignment() 3294 3295 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3296 return this 3297 3298 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3299 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3300 3301 return this 3302 3303 def _parse_index_params(self) -> exp.IndexParameters: 3304 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3305 3306 if self._match(TokenType.L_PAREN, advance=False): 3307 columns = self._parse_wrapped_csv(self._parse_with_operator) 3308 else: 3309 columns = None 3310 3311 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3312 partition_by = self._parse_partition_by() 3313 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3314 tablespace = ( 3315 self._parse_var(any_token=True) 3316 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3317 else None 3318 ) 3319 where = self._parse_where() 3320 3321 on = self._parse_field() if self._match(TokenType.ON) else None 3322 3323 return self.expression( 3324 exp.IndexParameters, 3325 using=using, 3326 columns=columns, 3327 include=include, 3328 partition_by=partition_by, 3329 where=where, 3330 with_storage=with_storage, 3331 tablespace=tablespace, 3332 on=on, 3333 ) 3334 3335 def _parse_index( 3336 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3337 ) -> t.Optional[exp.Index]: 3338 if index or anonymous: 3339 unique = None 3340 primary = None 3341 amp = None 3342 3343 self._match(TokenType.ON) 3344 self._match(TokenType.TABLE) # hive 3345 table = self._parse_table_parts(schema=True) 3346 else: 3347 unique = self._match(TokenType.UNIQUE) 3348 primary = self._match_text_seq("PRIMARY") 3349 amp = self._match_text_seq("AMP") 3350 3351 if not self._match(TokenType.INDEX): 3352 return None 3353 3354 index = self._parse_id_var() 3355 table = None 3356 3357 params = self._parse_index_params() 3358 3359 return self.expression( 3360 exp.Index, 3361 this=index, 3362 table=table, 3363 unique=unique, 3364 primary=primary, 3365 amp=amp, 3366 params=params, 3367 ) 3368 3369 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3370 hints: t.List[exp.Expression] = [] 3371 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3372 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3373 hints.append( 3374 self.expression( 3375 exp.WithTableHint, 3376 expressions=self._parse_csv( 3377 lambda: self._parse_function() or self._parse_var(any_token=True) 3378 ), 3379 ) 3380 ) 3381 self._match_r_paren() 3382 else: 3383 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3384 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3385 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3386 3387 self._match_set((TokenType.INDEX, TokenType.KEY)) 3388 if self._match(TokenType.FOR): 3389 hint.set("target", self._advance_any() and self._prev.text.upper()) 3390 3391 hint.set("expressions", self._parse_wrapped_id_vars()) 3392 hints.append(hint) 3393 3394 return hints or None 3395 3396 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3397 return ( 3398 (not schema and self._parse_function(optional_parens=False)) 3399 or self._parse_id_var(any_token=False) 3400 or self._parse_string_as_identifier() 3401 or self._parse_placeholder() 3402 ) 3403 3404 def _parse_table_parts( 3405 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3406 ) -> exp.Table: 3407 catalog = None 3408 db = None 3409 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3410 3411 while self._match(TokenType.DOT): 3412 if catalog: 3413 # This allows nesting the table in arbitrarily many dot expressions if needed 3414 table = self.expression( 3415 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3416 ) 3417 else: 3418 catalog = db 3419 db = table 3420 # "" used for tsql FROM a..b case 3421 table = self._parse_table_part(schema=schema) or "" 3422 3423 if ( 3424 wildcard 3425 and self._is_connected() 3426 and (isinstance(table, exp.Identifier) or not table) 3427 and self._match(TokenType.STAR) 3428 ): 3429 if isinstance(table, exp.Identifier): 3430 table.args["this"] += "*" 3431 else: 3432 table = exp.Identifier(this="*") 3433 3434 # We bubble up comments from the Identifier to the Table 3435 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3436 3437 if is_db_reference: 3438 catalog = db 3439 db = table 3440 table = None 3441 3442 if not table and not is_db_reference: 3443 self.raise_error(f"Expected table name but got {self._curr}") 3444 if not db and is_db_reference: 3445 self.raise_error(f"Expected database name but got {self._curr}") 3446 3447 table = self.expression( 3448 exp.Table, 3449 comments=comments, 3450 this=table, 3451 db=db, 3452 catalog=catalog, 3453 ) 3454 3455 changes = self._parse_changes() 3456 if changes: 3457 table.set("changes", changes) 3458 3459 at_before = self._parse_historical_data() 3460 if at_before: 3461 table.set("when", at_before) 3462 3463 pivots = self._parse_pivots() 3464 if pivots: 3465 table.set("pivots", pivots) 3466 3467 return table 3468 3469 def _parse_table( 3470 self, 3471 schema: bool = False, 3472 joins: bool = False, 3473 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3474 parse_bracket: bool = False, 3475 is_db_reference: bool = False, 3476 parse_partition: bool = False, 3477 ) -> t.Optional[exp.Expression]: 3478 lateral = self._parse_lateral() 3479 if lateral: 3480 return lateral 3481 3482 unnest = self._parse_unnest() 3483 if unnest: 3484 return unnest 3485 3486 values = self._parse_derived_table_values() 3487 if values: 3488 return values 3489 3490 subquery = self._parse_select(table=True) 3491 if subquery: 3492 if not subquery.args.get("pivots"): 3493 subquery.set("pivots", self._parse_pivots()) 3494 return subquery 3495 3496 bracket = parse_bracket and self._parse_bracket(None) 3497 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3498 3499 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3500 self._parse_table 3501 ) 3502 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3503 3504 only = self._match(TokenType.ONLY) 3505 3506 this = t.cast( 3507 exp.Expression, 3508 bracket 3509 or rows_from 3510 or self._parse_bracket( 3511 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3512 ), 3513 ) 3514 3515 if only: 3516 this.set("only", only) 3517 3518 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3519 self._match_text_seq("*") 3520 3521 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3522 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3523 this.set("partition", self._parse_partition()) 3524 3525 if schema: 3526 return self._parse_schema(this=this) 3527 3528 version = self._parse_version() 3529 3530 if version: 3531 this.set("version", version) 3532 3533 if self.dialect.ALIAS_POST_TABLESAMPLE: 3534 table_sample = self._parse_table_sample() 3535 3536 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3537 if alias: 3538 this.set("alias", alias) 3539 3540 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3541 return self.expression( 3542 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3543 ) 3544 3545 this.set("hints", self._parse_table_hints()) 3546 3547 if not this.args.get("pivots"): 3548 this.set("pivots", self._parse_pivots()) 3549 3550 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3551 table_sample = self._parse_table_sample() 3552 3553 if table_sample: 3554 table_sample.set("this", this) 3555 this = table_sample 3556 3557 if joins: 3558 for join in self._parse_joins(): 3559 this.append("joins", join) 3560 3561 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3562 this.set("ordinality", True) 3563 this.set("alias", self._parse_table_alias()) 3564 3565 return this 3566 3567 def _parse_version(self) -> t.Optional[exp.Version]: 3568 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3569 this = "TIMESTAMP" 3570 elif self._match(TokenType.VERSION_SNAPSHOT): 3571 this = "VERSION" 3572 else: 3573 return None 3574 3575 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3576 kind = self._prev.text.upper() 3577 start = self._parse_bitwise() 3578 self._match_texts(("TO", "AND")) 3579 end = self._parse_bitwise() 3580 expression: t.Optional[exp.Expression] = self.expression( 3581 exp.Tuple, expressions=[start, end] 3582 ) 3583 elif self._match_text_seq("CONTAINED", "IN"): 3584 kind = "CONTAINED IN" 3585 expression = self.expression( 3586 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3587 ) 3588 elif self._match(TokenType.ALL): 3589 kind = "ALL" 3590 expression = None 3591 else: 3592 self._match_text_seq("AS", "OF") 3593 kind = "AS OF" 3594 expression = self._parse_type() 3595 3596 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3597 3598 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3599 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3600 index = self._index 3601 historical_data = None 3602 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3603 this = self._prev.text.upper() 3604 kind = ( 3605 self._match(TokenType.L_PAREN) 3606 and self._match_texts(self.HISTORICAL_DATA_KIND) 3607 and self._prev.text.upper() 3608 ) 3609 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3610 3611 if expression: 3612 self._match_r_paren() 3613 historical_data = self.expression( 3614 exp.HistoricalData, this=this, kind=kind, expression=expression 3615 ) 3616 else: 3617 self._retreat(index) 3618 3619 return historical_data 3620 3621 def _parse_changes(self) -> t.Optional[exp.Changes]: 3622 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3623 return None 3624 3625 information = self._parse_var(any_token=True) 3626 self._match_r_paren() 3627 3628 return self.expression( 3629 exp.Changes, 3630 information=information, 3631 at_before=self._parse_historical_data(), 3632 end=self._parse_historical_data(), 3633 ) 3634 3635 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3636 if not self._match(TokenType.UNNEST): 3637 return None 3638 3639 expressions = self._parse_wrapped_csv(self._parse_equality) 3640 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3641 3642 alias = self._parse_table_alias() if with_alias else None 3643 3644 if alias: 3645 if self.dialect.UNNEST_COLUMN_ONLY: 3646 if alias.args.get("columns"): 3647 self.raise_error("Unexpected extra column alias in unnest.") 3648 3649 alias.set("columns", [alias.this]) 3650 alias.set("this", None) 3651 3652 columns = alias.args.get("columns") or [] 3653 if offset and len(expressions) < len(columns): 3654 offset = columns.pop() 3655 3656 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3657 self._match(TokenType.ALIAS) 3658 offset = self._parse_id_var( 3659 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3660 ) or exp.to_identifier("offset") 3661 3662 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3663 3664 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3665 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3666 if not is_derived and not ( 3667 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3668 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3669 ): 3670 return None 3671 3672 expressions = self._parse_csv(self._parse_value) 3673 alias = self._parse_table_alias() 3674 3675 if is_derived: 3676 self._match_r_paren() 3677 3678 return self.expression( 3679 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3680 ) 3681 3682 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3683 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3684 as_modifier and self._match_text_seq("USING", "SAMPLE") 3685 ): 3686 return None 3687 3688 bucket_numerator = None 3689 bucket_denominator = None 3690 bucket_field = None 3691 percent = None 3692 size = None 3693 seed = None 3694 3695 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3696 matched_l_paren = self._match(TokenType.L_PAREN) 3697 3698 if self.TABLESAMPLE_CSV: 3699 num = None 3700 expressions = self._parse_csv(self._parse_primary) 3701 else: 3702 expressions = None 3703 num = ( 3704 self._parse_factor() 3705 if self._match(TokenType.NUMBER, advance=False) 3706 else self._parse_primary() or self._parse_placeholder() 3707 ) 3708 3709 if self._match_text_seq("BUCKET"): 3710 bucket_numerator = self._parse_number() 3711 self._match_text_seq("OUT", "OF") 3712 bucket_denominator = bucket_denominator = self._parse_number() 3713 self._match(TokenType.ON) 3714 bucket_field = self._parse_field() 3715 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3716 percent = num 3717 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3718 size = num 3719 else: 3720 percent = num 3721 3722 if matched_l_paren: 3723 self._match_r_paren() 3724 3725 if self._match(TokenType.L_PAREN): 3726 method = self._parse_var(upper=True) 3727 seed = self._match(TokenType.COMMA) and self._parse_number() 3728 self._match_r_paren() 3729 elif self._match_texts(("SEED", "REPEATABLE")): 3730 seed = self._parse_wrapped(self._parse_number) 3731 3732 if not method and self.DEFAULT_SAMPLING_METHOD: 3733 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3734 3735 return self.expression( 3736 exp.TableSample, 3737 expressions=expressions, 3738 method=method, 3739 bucket_numerator=bucket_numerator, 3740 bucket_denominator=bucket_denominator, 3741 bucket_field=bucket_field, 3742 percent=percent, 3743 size=size, 3744 seed=seed, 3745 ) 3746 3747 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3748 return list(iter(self._parse_pivot, None)) or None 3749 3750 def _parse_joins(self) -> t.Iterator[exp.Join]: 3751 return iter(self._parse_join, None) 3752 3753 # https://duckdb.org/docs/sql/statements/pivot 3754 def _parse_simplified_pivot(self) -> exp.Pivot: 3755 def _parse_on() -> t.Optional[exp.Expression]: 3756 this = self._parse_bitwise() 3757 return self._parse_in(this) if self._match(TokenType.IN) else this 3758 3759 this = self._parse_table() 3760 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3761 using = self._match(TokenType.USING) and self._parse_csv( 3762 lambda: self._parse_alias(self._parse_function()) 3763 ) 3764 group = self._parse_group() 3765 return self.expression( 3766 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3767 ) 3768 3769 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3770 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3771 this = self._parse_select_or_expression() 3772 3773 self._match(TokenType.ALIAS) 3774 alias = self._parse_field() 3775 if alias: 3776 return self.expression(exp.PivotAlias, this=this, alias=alias) 3777 3778 return this 3779 3780 value = self._parse_column() 3781 3782 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3783 self.raise_error("Expecting IN (") 3784 3785 if self._match(TokenType.ANY): 3786 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3787 else: 3788 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3789 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3790 3791 self._match_r_paren() 3792 return expr 3793 3794 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3795 index = self._index 3796 include_nulls = None 3797 3798 if self._match(TokenType.PIVOT): 3799 unpivot = False 3800 elif self._match(TokenType.UNPIVOT): 3801 unpivot = True 3802 3803 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3804 if self._match_text_seq("INCLUDE", "NULLS"): 3805 include_nulls = True 3806 elif self._match_text_seq("EXCLUDE", "NULLS"): 3807 include_nulls = False 3808 else: 3809 return None 3810 3811 expressions = [] 3812 3813 if not self._match(TokenType.L_PAREN): 3814 self._retreat(index) 3815 return None 3816 3817 if unpivot: 3818 expressions = self._parse_csv(self._parse_column) 3819 else: 3820 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3821 3822 if not expressions: 3823 self.raise_error("Failed to parse PIVOT's aggregation list") 3824 3825 if not self._match(TokenType.FOR): 3826 self.raise_error("Expecting FOR") 3827 3828 field = self._parse_pivot_in() 3829 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3830 self._parse_bitwise 3831 ) 3832 3833 self._match_r_paren() 3834 3835 pivot = self.expression( 3836 exp.Pivot, 3837 expressions=expressions, 3838 field=field, 3839 unpivot=unpivot, 3840 include_nulls=include_nulls, 3841 default_on_null=default_on_null, 3842 ) 3843 3844 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3845 pivot.set("alias", self._parse_table_alias()) 3846 3847 if not unpivot: 3848 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3849 3850 columns: t.List[exp.Expression] = [] 3851 for fld in pivot.args["field"].expressions: 3852 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3853 for name in names: 3854 if self.PREFIXED_PIVOT_COLUMNS: 3855 name = f"{name}_{field_name}" if name else field_name 3856 else: 3857 name = f"{field_name}_{name}" if name else field_name 3858 3859 columns.append(exp.to_identifier(name)) 3860 3861 pivot.set("columns", columns) 3862 3863 return pivot 3864 3865 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3866 return [agg.alias for agg in aggregations] 3867 3868 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3869 if not skip_where_token and not self._match(TokenType.PREWHERE): 3870 return None 3871 3872 return self.expression( 3873 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3874 ) 3875 3876 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3877 if not skip_where_token and not self._match(TokenType.WHERE): 3878 return None 3879 3880 return self.expression( 3881 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3882 ) 3883 3884 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3885 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3886 return None 3887 3888 elements: t.Dict[str, t.Any] = defaultdict(list) 3889 3890 if self._match(TokenType.ALL): 3891 elements["all"] = True 3892 elif self._match(TokenType.DISTINCT): 3893 elements["all"] = False 3894 3895 while True: 3896 expressions = self._parse_csv( 3897 lambda: None 3898 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3899 else self._parse_assignment() 3900 ) 3901 if expressions: 3902 elements["expressions"].extend(expressions) 3903 3904 grouping_sets = self._parse_grouping_sets() 3905 if grouping_sets: 3906 elements["grouping_sets"].extend(grouping_sets) 3907 3908 rollup = None 3909 cube = None 3910 totals = None 3911 3912 index = self._index 3913 with_ = self._match(TokenType.WITH) 3914 if self._match(TokenType.ROLLUP): 3915 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3916 elements["rollup"].extend(ensure_list(rollup)) 3917 3918 if self._match(TokenType.CUBE): 3919 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3920 elements["cube"].extend(ensure_list(cube)) 3921 3922 if self._match_text_seq("TOTALS"): 3923 totals = True 3924 elements["totals"] = True # type: ignore 3925 3926 if not (grouping_sets or rollup or cube or totals): 3927 if with_: 3928 self._retreat(index) 3929 break 3930 3931 return self.expression(exp.Group, **elements) # type: ignore 3932 3933 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3934 if not self._match(TokenType.GROUPING_SETS): 3935 return None 3936 3937 return self._parse_wrapped_csv(self._parse_grouping_set) 3938 3939 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3940 if self._match(TokenType.L_PAREN): 3941 grouping_set = self._parse_csv(self._parse_column) 3942 self._match_r_paren() 3943 return self.expression(exp.Tuple, expressions=grouping_set) 3944 3945 return self._parse_column() 3946 3947 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3948 if not skip_having_token and not self._match(TokenType.HAVING): 3949 return None 3950 return self.expression(exp.Having, this=self._parse_assignment()) 3951 3952 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3953 if not self._match(TokenType.QUALIFY): 3954 return None 3955 return self.expression(exp.Qualify, this=self._parse_assignment()) 3956 3957 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3958 if skip_start_token: 3959 start = None 3960 elif self._match(TokenType.START_WITH): 3961 start = self._parse_assignment() 3962 else: 3963 return None 3964 3965 self._match(TokenType.CONNECT_BY) 3966 nocycle = self._match_text_seq("NOCYCLE") 3967 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3968 exp.Prior, this=self._parse_bitwise() 3969 ) 3970 connect = self._parse_assignment() 3971 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3972 3973 if not start and self._match(TokenType.START_WITH): 3974 start = self._parse_assignment() 3975 3976 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3977 3978 def _parse_name_as_expression(self) -> exp.Alias: 3979 return self.expression( 3980 exp.Alias, 3981 alias=self._parse_id_var(any_token=True), 3982 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3983 ) 3984 3985 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3986 if self._match_text_seq("INTERPOLATE"): 3987 return self._parse_wrapped_csv(self._parse_name_as_expression) 3988 return None 3989 3990 def _parse_order( 3991 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3992 ) -> t.Optional[exp.Expression]: 3993 siblings = None 3994 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3995 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3996 return this 3997 3998 siblings = True 3999 4000 return self.expression( 4001 exp.Order, 4002 this=this, 4003 expressions=self._parse_csv(self._parse_ordered), 4004 interpolate=self._parse_interpolate(), 4005 siblings=siblings, 4006 ) 4007 4008 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4009 if not self._match(token): 4010 return None 4011 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4012 4013 def _parse_ordered( 4014 self, parse_method: t.Optional[t.Callable] = None 4015 ) -> t.Optional[exp.Ordered]: 4016 this = parse_method() if parse_method else self._parse_assignment() 4017 if not this: 4018 return None 4019 4020 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4021 this = exp.var("ALL") 4022 4023 asc = self._match(TokenType.ASC) 4024 desc = self._match(TokenType.DESC) or (asc and False) 4025 4026 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4027 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4028 4029 nulls_first = is_nulls_first or False 4030 explicitly_null_ordered = is_nulls_first or is_nulls_last 4031 4032 if ( 4033 not explicitly_null_ordered 4034 and ( 4035 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4036 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4037 ) 4038 and self.dialect.NULL_ORDERING != "nulls_are_last" 4039 ): 4040 nulls_first = True 4041 4042 if self._match_text_seq("WITH", "FILL"): 4043 with_fill = self.expression( 4044 exp.WithFill, 4045 **{ # type: ignore 4046 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4047 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4048 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4049 }, 4050 ) 4051 else: 4052 with_fill = None 4053 4054 return self.expression( 4055 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4056 ) 4057 4058 def _parse_limit( 4059 self, 4060 this: t.Optional[exp.Expression] = None, 4061 top: bool = False, 4062 skip_limit_token: bool = False, 4063 ) -> t.Optional[exp.Expression]: 4064 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4065 comments = self._prev_comments 4066 if top: 4067 limit_paren = self._match(TokenType.L_PAREN) 4068 expression = self._parse_term() if limit_paren else self._parse_number() 4069 4070 if limit_paren: 4071 self._match_r_paren() 4072 else: 4073 expression = self._parse_term() 4074 4075 if self._match(TokenType.COMMA): 4076 offset = expression 4077 expression = self._parse_term() 4078 else: 4079 offset = None 4080 4081 limit_exp = self.expression( 4082 exp.Limit, 4083 this=this, 4084 expression=expression, 4085 offset=offset, 4086 comments=comments, 4087 expressions=self._parse_limit_by(), 4088 ) 4089 4090 return limit_exp 4091 4092 if self._match(TokenType.FETCH): 4093 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4094 direction = self._prev.text.upper() if direction else "FIRST" 4095 4096 count = self._parse_field(tokens=self.FETCH_TOKENS) 4097 percent = self._match(TokenType.PERCENT) 4098 4099 self._match_set((TokenType.ROW, TokenType.ROWS)) 4100 4101 only = self._match_text_seq("ONLY") 4102 with_ties = self._match_text_seq("WITH", "TIES") 4103 4104 if only and with_ties: 4105 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4106 4107 return self.expression( 4108 exp.Fetch, 4109 direction=direction, 4110 count=count, 4111 percent=percent, 4112 with_ties=with_ties, 4113 ) 4114 4115 return this 4116 4117 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4118 if not self._match(TokenType.OFFSET): 4119 return this 4120 4121 count = self._parse_term() 4122 self._match_set((TokenType.ROW, TokenType.ROWS)) 4123 4124 return self.expression( 4125 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4126 ) 4127 4128 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4129 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4130 4131 def _parse_locks(self) -> t.List[exp.Lock]: 4132 locks = [] 4133 while True: 4134 if self._match_text_seq("FOR", "UPDATE"): 4135 update = True 4136 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4137 "LOCK", "IN", "SHARE", "MODE" 4138 ): 4139 update = False 4140 else: 4141 break 4142 4143 expressions = None 4144 if self._match_text_seq("OF"): 4145 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4146 4147 wait: t.Optional[bool | exp.Expression] = None 4148 if self._match_text_seq("NOWAIT"): 4149 wait = True 4150 elif self._match_text_seq("WAIT"): 4151 wait = self._parse_primary() 4152 elif self._match_text_seq("SKIP", "LOCKED"): 4153 wait = False 4154 4155 locks.append( 4156 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4157 ) 4158 4159 return locks 4160 4161 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4162 while this and self._match_set(self.SET_OPERATIONS): 4163 token_type = self._prev.token_type 4164 4165 if token_type == TokenType.UNION: 4166 operation: t.Type[exp.SetOperation] = exp.Union 4167 elif token_type == TokenType.EXCEPT: 4168 operation = exp.Except 4169 else: 4170 operation = exp.Intersect 4171 4172 comments = self._prev.comments 4173 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4174 by_name = self._match_text_seq("BY", "NAME") 4175 expression = self._parse_select(nested=True, parse_set_operation=False) 4176 4177 this = self.expression( 4178 operation, 4179 comments=comments, 4180 this=this, 4181 distinct=distinct, 4182 by_name=by_name, 4183 expression=expression, 4184 ) 4185 4186 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4187 expression = this.expression 4188 4189 if expression: 4190 for arg in self.SET_OP_MODIFIERS: 4191 expr = expression.args.get(arg) 4192 if expr: 4193 this.set(arg, expr.pop()) 4194 4195 return this 4196 4197 def _parse_expression(self) -> t.Optional[exp.Expression]: 4198 return self._parse_alias(self._parse_assignment()) 4199 4200 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4201 this = self._parse_disjunction() 4202 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4203 # This allows us to parse <non-identifier token> := <expr> 4204 this = exp.column( 4205 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4206 ) 4207 4208 while self._match_set(self.ASSIGNMENT): 4209 this = self.expression( 4210 self.ASSIGNMENT[self._prev.token_type], 4211 this=this, 4212 comments=self._prev_comments, 4213 expression=self._parse_assignment(), 4214 ) 4215 4216 return this 4217 4218 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4219 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4220 4221 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4222 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4223 4224 def _parse_equality(self) -> t.Optional[exp.Expression]: 4225 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4226 4227 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4228 return self._parse_tokens(self._parse_range, self.COMPARISON) 4229 4230 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4231 this = this or self._parse_bitwise() 4232 negate = self._match(TokenType.NOT) 4233 4234 if self._match_set(self.RANGE_PARSERS): 4235 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4236 if not expression: 4237 return this 4238 4239 this = expression 4240 elif self._match(TokenType.ISNULL): 4241 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4242 4243 # Postgres supports ISNULL and NOTNULL for conditions. 4244 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4245 if self._match(TokenType.NOTNULL): 4246 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4247 this = self.expression(exp.Not, this=this) 4248 4249 if negate: 4250 this = self._negate_range(this) 4251 4252 if self._match(TokenType.IS): 4253 this = self._parse_is(this) 4254 4255 return this 4256 4257 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4258 if not this: 4259 return this 4260 4261 return self.expression(exp.Not, this=this) 4262 4263 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4264 index = self._index - 1 4265 negate = self._match(TokenType.NOT) 4266 4267 if self._match_text_seq("DISTINCT", "FROM"): 4268 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4269 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4270 4271 expression = self._parse_null() or self._parse_boolean() 4272 if not expression: 4273 self._retreat(index) 4274 return None 4275 4276 this = self.expression(exp.Is, this=this, expression=expression) 4277 return self.expression(exp.Not, this=this) if negate else this 4278 4279 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4280 unnest = self._parse_unnest(with_alias=False) 4281 if unnest: 4282 this = self.expression(exp.In, this=this, unnest=unnest) 4283 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4284 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4285 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4286 4287 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4288 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4289 else: 4290 this = self.expression(exp.In, this=this, expressions=expressions) 4291 4292 if matched_l_paren: 4293 self._match_r_paren(this) 4294 elif not self._match(TokenType.R_BRACKET, expression=this): 4295 self.raise_error("Expecting ]") 4296 else: 4297 this = self.expression(exp.In, this=this, field=self._parse_field()) 4298 4299 return this 4300 4301 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4302 low = self._parse_bitwise() 4303 self._match(TokenType.AND) 4304 high = self._parse_bitwise() 4305 return self.expression(exp.Between, this=this, low=low, high=high) 4306 4307 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4308 if not self._match(TokenType.ESCAPE): 4309 return this 4310 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4311 4312 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4313 index = self._index 4314 4315 if not self._match(TokenType.INTERVAL) and match_interval: 4316 return None 4317 4318 if self._match(TokenType.STRING, advance=False): 4319 this = self._parse_primary() 4320 else: 4321 this = self._parse_term() 4322 4323 if not this or ( 4324 isinstance(this, exp.Column) 4325 and not this.table 4326 and not this.this.quoted 4327 and this.name.upper() == "IS" 4328 ): 4329 self._retreat(index) 4330 return None 4331 4332 unit = self._parse_function() or ( 4333 not self._match(TokenType.ALIAS, advance=False) 4334 and self._parse_var(any_token=True, upper=True) 4335 ) 4336 4337 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4338 # each INTERVAL expression into this canonical form so it's easy to transpile 4339 if this and this.is_number: 4340 this = exp.Literal.string(this.to_py()) 4341 elif this and this.is_string: 4342 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4343 if len(parts) == 1: 4344 if unit: 4345 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4346 self._retreat(self._index - 1) 4347 4348 this = exp.Literal.string(parts[0][0]) 4349 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4350 4351 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4352 unit = self.expression( 4353 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4354 ) 4355 4356 interval = self.expression(exp.Interval, this=this, unit=unit) 4357 4358 index = self._index 4359 self._match(TokenType.PLUS) 4360 4361 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4362 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4363 return self.expression( 4364 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4365 ) 4366 4367 self._retreat(index) 4368 return interval 4369 4370 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4371 this = self._parse_term() 4372 4373 while True: 4374 if self._match_set(self.BITWISE): 4375 this = self.expression( 4376 self.BITWISE[self._prev.token_type], 4377 this=this, 4378 expression=self._parse_term(), 4379 ) 4380 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4381 this = self.expression( 4382 exp.DPipe, 4383 this=this, 4384 expression=self._parse_term(), 4385 safe=not self.dialect.STRICT_STRING_CONCAT, 4386 ) 4387 elif self._match(TokenType.DQMARK): 4388 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4389 elif self._match_pair(TokenType.LT, TokenType.LT): 4390 this = self.expression( 4391 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4392 ) 4393 elif self._match_pair(TokenType.GT, TokenType.GT): 4394 this = self.expression( 4395 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4396 ) 4397 else: 4398 break 4399 4400 return this 4401 4402 def _parse_term(self) -> t.Optional[exp.Expression]: 4403 this = self._parse_factor() 4404 4405 while self._match_set(self.TERM): 4406 klass = self.TERM[self._prev.token_type] 4407 comments = self._prev_comments 4408 expression = self._parse_factor() 4409 4410 this = self.expression(klass, this=this, comments=comments, expression=expression) 4411 4412 if isinstance(this, exp.Collate): 4413 expr = this.expression 4414 4415 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4416 # fallback to Identifier / Var 4417 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4418 ident = expr.this 4419 if isinstance(ident, exp.Identifier): 4420 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4421 4422 return this 4423 4424 def _parse_factor(self) -> t.Optional[exp.Expression]: 4425 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4426 this = parse_method() 4427 4428 while self._match_set(self.FACTOR): 4429 klass = self.FACTOR[self._prev.token_type] 4430 comments = self._prev_comments 4431 expression = parse_method() 4432 4433 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4434 self._retreat(self._index - 1) 4435 return this 4436 4437 this = self.expression(klass, this=this, comments=comments, expression=expression) 4438 4439 if isinstance(this, exp.Div): 4440 this.args["typed"] = self.dialect.TYPED_DIVISION 4441 this.args["safe"] = self.dialect.SAFE_DIVISION 4442 4443 return this 4444 4445 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4446 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4447 4448 def _parse_unary(self) -> t.Optional[exp.Expression]: 4449 if self._match_set(self.UNARY_PARSERS): 4450 return self.UNARY_PARSERS[self._prev.token_type](self) 4451 return self._parse_at_time_zone(self._parse_type()) 4452 4453 def _parse_type( 4454 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4455 ) -> t.Optional[exp.Expression]: 4456 interval = parse_interval and self._parse_interval() 4457 if interval: 4458 return interval 4459 4460 index = self._index 4461 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4462 4463 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4464 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4465 if isinstance(data_type, exp.Cast): 4466 # This constructor can contain ops directly after it, for instance struct unnesting: 4467 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4468 return self._parse_column_ops(data_type) 4469 4470 if data_type: 4471 index2 = self._index 4472 this = self._parse_primary() 4473 4474 if isinstance(this, exp.Literal): 4475 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4476 if parser: 4477 return parser(self, this, data_type) 4478 4479 return self.expression(exp.Cast, this=this, to=data_type) 4480 4481 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4482 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4483 # 4484 # If the index difference here is greater than 1, that means the parser itself must have 4485 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4486 # 4487 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4488 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4489 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4490 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4491 # 4492 # In these cases, we don't really want to return the converted type, but instead retreat 4493 # and try to parse a Column or Identifier in the section below. 4494 if data_type.expressions and index2 - index > 1: 4495 self._retreat(index2) 4496 return self._parse_column_ops(data_type) 4497 4498 self._retreat(index) 4499 4500 if fallback_to_identifier: 4501 return self._parse_id_var() 4502 4503 this = self._parse_column() 4504 return this and self._parse_column_ops(this) 4505 4506 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4507 this = self._parse_type() 4508 if not this: 4509 return None 4510 4511 if isinstance(this, exp.Column) and not this.table: 4512 this = exp.var(this.name.upper()) 4513 4514 return self.expression( 4515 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4516 ) 4517 4518 def _parse_types( 4519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4520 ) -> t.Optional[exp.Expression]: 4521 index = self._index 4522 4523 this: t.Optional[exp.Expression] = None 4524 prefix = self._match_text_seq("SYSUDTLIB", ".") 4525 4526 if not self._match_set(self.TYPE_TOKENS): 4527 identifier = allow_identifiers and self._parse_id_var( 4528 any_token=False, tokens=(TokenType.VAR,) 4529 ) 4530 if isinstance(identifier, exp.Identifier): 4531 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4532 4533 if len(tokens) != 1: 4534 self.raise_error("Unexpected identifier", self._prev) 4535 4536 if tokens[0].token_type in self.TYPE_TOKENS: 4537 self._prev = tokens[0] 4538 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4539 type_name = identifier.name 4540 4541 while self._match(TokenType.DOT): 4542 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4543 4544 this = exp.DataType.build(type_name, udt=True) 4545 else: 4546 self._retreat(self._index - 1) 4547 return None 4548 else: 4549 return None 4550 4551 type_token = self._prev.token_type 4552 4553 if type_token == TokenType.PSEUDO_TYPE: 4554 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4555 4556 if type_token == TokenType.OBJECT_IDENTIFIER: 4557 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4558 4559 # https://materialize.com/docs/sql/types/map/ 4560 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4561 key_type = self._parse_types( 4562 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4563 ) 4564 if not self._match(TokenType.FARROW): 4565 self._retreat(index) 4566 return None 4567 4568 value_type = self._parse_types( 4569 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4570 ) 4571 if not self._match(TokenType.R_BRACKET): 4572 self._retreat(index) 4573 return None 4574 4575 return exp.DataType( 4576 this=exp.DataType.Type.MAP, 4577 expressions=[key_type, value_type], 4578 nested=True, 4579 prefix=prefix, 4580 ) 4581 4582 nested = type_token in self.NESTED_TYPE_TOKENS 4583 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4584 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4585 expressions = None 4586 maybe_func = False 4587 4588 if self._match(TokenType.L_PAREN): 4589 if is_struct: 4590 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4591 elif nested: 4592 expressions = self._parse_csv( 4593 lambda: self._parse_types( 4594 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4595 ) 4596 ) 4597 elif type_token in self.ENUM_TYPE_TOKENS: 4598 expressions = self._parse_csv(self._parse_equality) 4599 elif is_aggregate: 4600 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4601 any_token=False, tokens=(TokenType.VAR,) 4602 ) 4603 if not func_or_ident or not self._match(TokenType.COMMA): 4604 return None 4605 expressions = self._parse_csv( 4606 lambda: self._parse_types( 4607 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4608 ) 4609 ) 4610 expressions.insert(0, func_or_ident) 4611 else: 4612 expressions = self._parse_csv(self._parse_type_size) 4613 4614 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4615 if type_token == TokenType.VECTOR and len(expressions) == 2: 4616 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4617 4618 if not expressions or not self._match(TokenType.R_PAREN): 4619 self._retreat(index) 4620 return None 4621 4622 maybe_func = True 4623 4624 values: t.Optional[t.List[exp.Expression]] = None 4625 4626 if nested and self._match(TokenType.LT): 4627 if is_struct: 4628 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4629 else: 4630 expressions = self._parse_csv( 4631 lambda: self._parse_types( 4632 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4633 ) 4634 ) 4635 4636 if not self._match(TokenType.GT): 4637 self.raise_error("Expecting >") 4638 4639 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4640 values = self._parse_csv(self._parse_assignment) 4641 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4642 4643 if type_token in self.TIMESTAMPS: 4644 if self._match_text_seq("WITH", "TIME", "ZONE"): 4645 maybe_func = False 4646 tz_type = ( 4647 exp.DataType.Type.TIMETZ 4648 if type_token in self.TIMES 4649 else exp.DataType.Type.TIMESTAMPTZ 4650 ) 4651 this = exp.DataType(this=tz_type, expressions=expressions) 4652 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4653 maybe_func = False 4654 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4655 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4656 maybe_func = False 4657 elif type_token == TokenType.INTERVAL: 4658 unit = self._parse_var(upper=True) 4659 if unit: 4660 if self._match_text_seq("TO"): 4661 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4662 4663 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4664 else: 4665 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4666 4667 if maybe_func and check_func: 4668 index2 = self._index 4669 peek = self._parse_string() 4670 4671 if not peek: 4672 self._retreat(index) 4673 return None 4674 4675 self._retreat(index2) 4676 4677 if not this: 4678 if self._match_text_seq("UNSIGNED"): 4679 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4680 if not unsigned_type_token: 4681 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4682 4683 type_token = unsigned_type_token or type_token 4684 4685 this = exp.DataType( 4686 this=exp.DataType.Type[type_token.value], 4687 expressions=expressions, 4688 nested=nested, 4689 prefix=prefix, 4690 ) 4691 4692 # Empty arrays/structs are allowed 4693 if values is not None: 4694 cls = exp.Struct if is_struct else exp.Array 4695 this = exp.cast(cls(expressions=values), this, copy=False) 4696 4697 elif expressions: 4698 this.set("expressions", expressions) 4699 4700 # https://materialize.com/docs/sql/types/list/#type-name 4701 while self._match(TokenType.LIST): 4702 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4703 4704 index = self._index 4705 4706 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4707 matched_array = self._match(TokenType.ARRAY) 4708 4709 while self._curr: 4710 datatype_token = self._prev.token_type 4711 matched_l_bracket = self._match(TokenType.L_BRACKET) 4712 if not matched_l_bracket and not matched_array: 4713 break 4714 4715 matched_array = False 4716 values = self._parse_csv(self._parse_assignment) or None 4717 if ( 4718 values 4719 and not schema 4720 and ( 4721 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4722 ) 4723 ): 4724 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4725 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4726 self._retreat(index) 4727 break 4728 4729 this = exp.DataType( 4730 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4731 ) 4732 self._match(TokenType.R_BRACKET) 4733 4734 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4735 converter = self.TYPE_CONVERTERS.get(this.this) 4736 if converter: 4737 this = converter(t.cast(exp.DataType, this)) 4738 4739 return this 4740 4741 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4742 index = self._index 4743 4744 if ( 4745 self._curr 4746 and self._next 4747 and self._curr.token_type in self.TYPE_TOKENS 4748 and self._next.token_type in self.TYPE_TOKENS 4749 ): 4750 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4751 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4752 this = self._parse_id_var() 4753 else: 4754 this = ( 4755 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4756 or self._parse_id_var() 4757 ) 4758 4759 self._match(TokenType.COLON) 4760 4761 if ( 4762 type_required 4763 and not isinstance(this, exp.DataType) 4764 and not self._match_set(self.TYPE_TOKENS, advance=False) 4765 ): 4766 self._retreat(index) 4767 return self._parse_types() 4768 4769 return self._parse_column_def(this) 4770 4771 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4772 if not self._match_text_seq("AT", "TIME", "ZONE"): 4773 return this 4774 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4775 4776 def _parse_column(self) -> t.Optional[exp.Expression]: 4777 this = self._parse_column_reference() 4778 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4779 4780 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4781 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4782 4783 return column 4784 4785 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4786 this = self._parse_field() 4787 if ( 4788 not this 4789 and self._match(TokenType.VALUES, advance=False) 4790 and self.VALUES_FOLLOWED_BY_PAREN 4791 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4792 ): 4793 this = self._parse_id_var() 4794 4795 if isinstance(this, exp.Identifier): 4796 # We bubble up comments from the Identifier to the Column 4797 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4798 4799 return this 4800 4801 def _parse_colon_as_variant_extract( 4802 self, this: t.Optional[exp.Expression] 4803 ) -> t.Optional[exp.Expression]: 4804 casts = [] 4805 json_path = [] 4806 4807 while self._match(TokenType.COLON): 4808 start_index = self._index 4809 4810 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4811 path = self._parse_column_ops( 4812 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4813 ) 4814 4815 # The cast :: operator has a lower precedence than the extraction operator :, so 4816 # we rearrange the AST appropriately to avoid casting the JSON path 4817 while isinstance(path, exp.Cast): 4818 casts.append(path.to) 4819 path = path.this 4820 4821 if casts: 4822 dcolon_offset = next( 4823 i 4824 for i, t in enumerate(self._tokens[start_index:]) 4825 if t.token_type == TokenType.DCOLON 4826 ) 4827 end_token = self._tokens[start_index + dcolon_offset - 1] 4828 else: 4829 end_token = self._prev 4830 4831 if path: 4832 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4833 4834 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4835 # Databricks transforms it back to the colon/dot notation 4836 if json_path: 4837 this = self.expression( 4838 exp.JSONExtract, 4839 this=this, 4840 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4841 variant_extract=True, 4842 ) 4843 4844 while casts: 4845 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4846 4847 return this 4848 4849 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4850 return self._parse_types() 4851 4852 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4853 this = self._parse_bracket(this) 4854 4855 while self._match_set(self.COLUMN_OPERATORS): 4856 op_token = self._prev.token_type 4857 op = self.COLUMN_OPERATORS.get(op_token) 4858 4859 if op_token == TokenType.DCOLON: 4860 field = self._parse_dcolon() 4861 if not field: 4862 self.raise_error("Expected type") 4863 elif op and self._curr: 4864 field = self._parse_column_reference() 4865 else: 4866 field = self._parse_field(any_token=True, anonymous_func=True) 4867 4868 if isinstance(field, exp.Func) and this: 4869 # bigquery allows function calls like x.y.count(...) 4870 # SAFE.SUBSTR(...) 4871 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4872 this = exp.replace_tree( 4873 this, 4874 lambda n: ( 4875 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4876 if n.table 4877 else n.this 4878 ) 4879 if isinstance(n, exp.Column) 4880 else n, 4881 ) 4882 4883 if op: 4884 this = op(self, this, field) 4885 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4886 this = self.expression( 4887 exp.Column, 4888 this=field, 4889 table=this.this, 4890 db=this.args.get("table"), 4891 catalog=this.args.get("db"), 4892 ) 4893 else: 4894 this = self.expression(exp.Dot, this=this, expression=field) 4895 4896 this = self._parse_bracket(this) 4897 4898 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4899 4900 def _parse_primary(self) -> t.Optional[exp.Expression]: 4901 if self._match_set(self.PRIMARY_PARSERS): 4902 token_type = self._prev.token_type 4903 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4904 4905 if token_type == TokenType.STRING: 4906 expressions = [primary] 4907 while self._match(TokenType.STRING): 4908 expressions.append(exp.Literal.string(self._prev.text)) 4909 4910 if len(expressions) > 1: 4911 return self.expression(exp.Concat, expressions=expressions) 4912 4913 return primary 4914 4915 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4916 return exp.Literal.number(f"0.{self._prev.text}") 4917 4918 if self._match(TokenType.L_PAREN): 4919 comments = self._prev_comments 4920 query = self._parse_select() 4921 4922 if query: 4923 expressions = [query] 4924 else: 4925 expressions = self._parse_expressions() 4926 4927 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4928 4929 if not this and self._match(TokenType.R_PAREN, advance=False): 4930 this = self.expression(exp.Tuple) 4931 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4932 this = self._parse_subquery(this=this, parse_alias=False) 4933 elif isinstance(this, exp.Subquery): 4934 this = self._parse_subquery( 4935 this=self._parse_set_operations(this), parse_alias=False 4936 ) 4937 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4938 this = self.expression(exp.Tuple, expressions=expressions) 4939 else: 4940 this = self.expression(exp.Paren, this=this) 4941 4942 if this: 4943 this.add_comments(comments) 4944 4945 self._match_r_paren(expression=this) 4946 return this 4947 4948 return None 4949 4950 def _parse_field( 4951 self, 4952 any_token: bool = False, 4953 tokens: t.Optional[t.Collection[TokenType]] = None, 4954 anonymous_func: bool = False, 4955 ) -> t.Optional[exp.Expression]: 4956 if anonymous_func: 4957 field = ( 4958 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4959 or self._parse_primary() 4960 ) 4961 else: 4962 field = self._parse_primary() or self._parse_function( 4963 anonymous=anonymous_func, any_token=any_token 4964 ) 4965 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4966 4967 def _parse_function( 4968 self, 4969 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4970 anonymous: bool = False, 4971 optional_parens: bool = True, 4972 any_token: bool = False, 4973 ) -> t.Optional[exp.Expression]: 4974 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4975 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4976 fn_syntax = False 4977 if ( 4978 self._match(TokenType.L_BRACE, advance=False) 4979 and self._next 4980 and self._next.text.upper() == "FN" 4981 ): 4982 self._advance(2) 4983 fn_syntax = True 4984 4985 func = self._parse_function_call( 4986 functions=functions, 4987 anonymous=anonymous, 4988 optional_parens=optional_parens, 4989 any_token=any_token, 4990 ) 4991 4992 if fn_syntax: 4993 self._match(TokenType.R_BRACE) 4994 4995 return func 4996 4997 def _parse_function_call( 4998 self, 4999 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5000 anonymous: bool = False, 5001 optional_parens: bool = True, 5002 any_token: bool = False, 5003 ) -> t.Optional[exp.Expression]: 5004 if not self._curr: 5005 return None 5006 5007 comments = self._curr.comments 5008 token_type = self._curr.token_type 5009 this = self._curr.text 5010 upper = this.upper() 5011 5012 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5013 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5014 self._advance() 5015 return self._parse_window(parser(self)) 5016 5017 if not self._next or self._next.token_type != TokenType.L_PAREN: 5018 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5019 self._advance() 5020 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5021 5022 return None 5023 5024 if any_token: 5025 if token_type in self.RESERVED_TOKENS: 5026 return None 5027 elif token_type not in self.FUNC_TOKENS: 5028 return None 5029 5030 self._advance(2) 5031 5032 parser = self.FUNCTION_PARSERS.get(upper) 5033 if parser and not anonymous: 5034 this = parser(self) 5035 else: 5036 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5037 5038 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5039 this = self.expression(subquery_predicate, this=self._parse_select()) 5040 self._match_r_paren() 5041 return this 5042 5043 if functions is None: 5044 functions = self.FUNCTIONS 5045 5046 function = functions.get(upper) 5047 5048 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5049 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5050 5051 if alias: 5052 args = self._kv_to_prop_eq(args) 5053 5054 if function and not anonymous: 5055 if "dialect" in function.__code__.co_varnames: 5056 func = function(args, dialect=self.dialect) 5057 else: 5058 func = function(args) 5059 5060 func = self.validate_expression(func, args) 5061 if not self.dialect.NORMALIZE_FUNCTIONS: 5062 func.meta["name"] = this 5063 5064 this = func 5065 else: 5066 if token_type == TokenType.IDENTIFIER: 5067 this = exp.Identifier(this=this, quoted=True) 5068 this = self.expression(exp.Anonymous, this=this, expressions=args) 5069 5070 if isinstance(this, exp.Expression): 5071 this.add_comments(comments) 5072 5073 self._match_r_paren(this) 5074 return self._parse_window(this) 5075 5076 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5077 transformed = [] 5078 5079 for e in expressions: 5080 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5081 if isinstance(e, exp.Alias): 5082 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5083 5084 if not isinstance(e, exp.PropertyEQ): 5085 e = self.expression( 5086 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5087 ) 5088 5089 if isinstance(e.this, exp.Column): 5090 e.this.replace(e.this.this) 5091 5092 transformed.append(e) 5093 5094 return transformed 5095 5096 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5097 return self._parse_column_def(self._parse_id_var()) 5098 5099 def _parse_user_defined_function( 5100 self, kind: t.Optional[TokenType] = None 5101 ) -> t.Optional[exp.Expression]: 5102 this = self._parse_id_var() 5103 5104 while self._match(TokenType.DOT): 5105 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5106 5107 if not self._match(TokenType.L_PAREN): 5108 return this 5109 5110 expressions = self._parse_csv(self._parse_function_parameter) 5111 self._match_r_paren() 5112 return self.expression( 5113 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5114 ) 5115 5116 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5117 literal = self._parse_primary() 5118 if literal: 5119 return self.expression(exp.Introducer, this=token.text, expression=literal) 5120 5121 return self.expression(exp.Identifier, this=token.text) 5122 5123 def _parse_session_parameter(self) -> exp.SessionParameter: 5124 kind = None 5125 this = self._parse_id_var() or self._parse_primary() 5126 5127 if this and self._match(TokenType.DOT): 5128 kind = this.name 5129 this = self._parse_var() or self._parse_primary() 5130 5131 return self.expression(exp.SessionParameter, this=this, kind=kind) 5132 5133 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5134 return self._parse_id_var() 5135 5136 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5137 index = self._index 5138 5139 if self._match(TokenType.L_PAREN): 5140 expressions = t.cast( 5141 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5142 ) 5143 5144 if not self._match(TokenType.R_PAREN): 5145 self._retreat(index) 5146 else: 5147 expressions = [self._parse_lambda_arg()] 5148 5149 if self._match_set(self.LAMBDAS): 5150 return self.LAMBDAS[self._prev.token_type](self, expressions) 5151 5152 self._retreat(index) 5153 5154 this: t.Optional[exp.Expression] 5155 5156 if self._match(TokenType.DISTINCT): 5157 this = self.expression( 5158 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5159 ) 5160 else: 5161 this = self._parse_select_or_expression(alias=alias) 5162 5163 return self._parse_limit( 5164 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5165 ) 5166 5167 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5168 index = self._index 5169 if not self._match(TokenType.L_PAREN): 5170 return this 5171 5172 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5173 # expr can be of both types 5174 if self._match_set(self.SELECT_START_TOKENS): 5175 self._retreat(index) 5176 return this 5177 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5178 self._match_r_paren() 5179 return self.expression(exp.Schema, this=this, expressions=args) 5180 5181 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5182 return self._parse_column_def(self._parse_field(any_token=True)) 5183 5184 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5185 # column defs are not really columns, they're identifiers 5186 if isinstance(this, exp.Column): 5187 this = this.this 5188 5189 kind = self._parse_types(schema=True) 5190 5191 if self._match_text_seq("FOR", "ORDINALITY"): 5192 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5193 5194 constraints: t.List[exp.Expression] = [] 5195 5196 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5197 ("ALIAS", "MATERIALIZED") 5198 ): 5199 persisted = self._prev.text.upper() == "MATERIALIZED" 5200 constraints.append( 5201 self.expression( 5202 exp.ComputedColumnConstraint, 5203 this=self._parse_assignment(), 5204 persisted=persisted or self._match_text_seq("PERSISTED"), 5205 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5206 ) 5207 ) 5208 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5209 self._match(TokenType.ALIAS) 5210 constraints.append( 5211 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5212 ) 5213 5214 while True: 5215 constraint = self._parse_column_constraint() 5216 if not constraint: 5217 break 5218 constraints.append(constraint) 5219 5220 if not kind and not constraints: 5221 return this 5222 5223 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5224 5225 def _parse_auto_increment( 5226 self, 5227 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5228 start = None 5229 increment = None 5230 5231 if self._match(TokenType.L_PAREN, advance=False): 5232 args = self._parse_wrapped_csv(self._parse_bitwise) 5233 start = seq_get(args, 0) 5234 increment = seq_get(args, 1) 5235 elif self._match_text_seq("START"): 5236 start = self._parse_bitwise() 5237 self._match_text_seq("INCREMENT") 5238 increment = self._parse_bitwise() 5239 5240 if start and increment: 5241 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5242 5243 return exp.AutoIncrementColumnConstraint() 5244 5245 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5246 if not self._match_text_seq("REFRESH"): 5247 self._retreat(self._index - 1) 5248 return None 5249 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5250 5251 def _parse_compress(self) -> exp.CompressColumnConstraint: 5252 if self._match(TokenType.L_PAREN, advance=False): 5253 return self.expression( 5254 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5255 ) 5256 5257 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5258 5259 def _parse_generated_as_identity( 5260 self, 5261 ) -> ( 5262 exp.GeneratedAsIdentityColumnConstraint 5263 | exp.ComputedColumnConstraint 5264 | exp.GeneratedAsRowColumnConstraint 5265 ): 5266 if self._match_text_seq("BY", "DEFAULT"): 5267 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5268 this = self.expression( 5269 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5270 ) 5271 else: 5272 self._match_text_seq("ALWAYS") 5273 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5274 5275 self._match(TokenType.ALIAS) 5276 5277 if self._match_text_seq("ROW"): 5278 start = self._match_text_seq("START") 5279 if not start: 5280 self._match(TokenType.END) 5281 hidden = self._match_text_seq("HIDDEN") 5282 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5283 5284 identity = self._match_text_seq("IDENTITY") 5285 5286 if self._match(TokenType.L_PAREN): 5287 if self._match(TokenType.START_WITH): 5288 this.set("start", self._parse_bitwise()) 5289 if self._match_text_seq("INCREMENT", "BY"): 5290 this.set("increment", self._parse_bitwise()) 5291 if self._match_text_seq("MINVALUE"): 5292 this.set("minvalue", self._parse_bitwise()) 5293 if self._match_text_seq("MAXVALUE"): 5294 this.set("maxvalue", self._parse_bitwise()) 5295 5296 if self._match_text_seq("CYCLE"): 5297 this.set("cycle", True) 5298 elif self._match_text_seq("NO", "CYCLE"): 5299 this.set("cycle", False) 5300 5301 if not identity: 5302 this.set("expression", self._parse_range()) 5303 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5304 args = self._parse_csv(self._parse_bitwise) 5305 this.set("start", seq_get(args, 0)) 5306 this.set("increment", seq_get(args, 1)) 5307 5308 self._match_r_paren() 5309 5310 return this 5311 5312 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5313 self._match_text_seq("LENGTH") 5314 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5315 5316 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5317 if self._match_text_seq("NULL"): 5318 return self.expression(exp.NotNullColumnConstraint) 5319 if self._match_text_seq("CASESPECIFIC"): 5320 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5321 if self._match_text_seq("FOR", "REPLICATION"): 5322 return self.expression(exp.NotForReplicationColumnConstraint) 5323 return None 5324 5325 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5326 if self._match(TokenType.CONSTRAINT): 5327 this = self._parse_id_var() 5328 else: 5329 this = None 5330 5331 if self._match_texts(self.CONSTRAINT_PARSERS): 5332 return self.expression( 5333 exp.ColumnConstraint, 5334 this=this, 5335 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5336 ) 5337 5338 return this 5339 5340 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5341 if not self._match(TokenType.CONSTRAINT): 5342 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5343 5344 return self.expression( 5345 exp.Constraint, 5346 this=self._parse_id_var(), 5347 expressions=self._parse_unnamed_constraints(), 5348 ) 5349 5350 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5351 constraints = [] 5352 while True: 5353 constraint = self._parse_unnamed_constraint() or self._parse_function() 5354 if not constraint: 5355 break 5356 constraints.append(constraint) 5357 5358 return constraints 5359 5360 def _parse_unnamed_constraint( 5361 self, constraints: t.Optional[t.Collection[str]] = None 5362 ) -> t.Optional[exp.Expression]: 5363 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5364 constraints or self.CONSTRAINT_PARSERS 5365 ): 5366 return None 5367 5368 constraint = self._prev.text.upper() 5369 if constraint not in self.CONSTRAINT_PARSERS: 5370 self.raise_error(f"No parser found for schema constraint {constraint}.") 5371 5372 return self.CONSTRAINT_PARSERS[constraint](self) 5373 5374 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5375 return self._parse_id_var(any_token=False) 5376 5377 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5378 self._match_text_seq("KEY") 5379 return self.expression( 5380 exp.UniqueColumnConstraint, 5381 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5382 this=self._parse_schema(self._parse_unique_key()), 5383 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5384 on_conflict=self._parse_on_conflict(), 5385 ) 5386 5387 def _parse_key_constraint_options(self) -> t.List[str]: 5388 options = [] 5389 while True: 5390 if not self._curr: 5391 break 5392 5393 if self._match(TokenType.ON): 5394 action = None 5395 on = self._advance_any() and self._prev.text 5396 5397 if self._match_text_seq("NO", "ACTION"): 5398 action = "NO ACTION" 5399 elif self._match_text_seq("CASCADE"): 5400 action = "CASCADE" 5401 elif self._match_text_seq("RESTRICT"): 5402 action = "RESTRICT" 5403 elif self._match_pair(TokenType.SET, TokenType.NULL): 5404 action = "SET NULL" 5405 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5406 action = "SET DEFAULT" 5407 else: 5408 self.raise_error("Invalid key constraint") 5409 5410 options.append(f"ON {on} {action}") 5411 else: 5412 var = self._parse_var_from_options( 5413 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5414 ) 5415 if not var: 5416 break 5417 options.append(var.name) 5418 5419 return options 5420 5421 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5422 if match and not self._match(TokenType.REFERENCES): 5423 return None 5424 5425 expressions = None 5426 this = self._parse_table(schema=True) 5427 options = self._parse_key_constraint_options() 5428 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5429 5430 def _parse_foreign_key(self) -> exp.ForeignKey: 5431 expressions = self._parse_wrapped_id_vars() 5432 reference = self._parse_references() 5433 options = {} 5434 5435 while self._match(TokenType.ON): 5436 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5437 self.raise_error("Expected DELETE or UPDATE") 5438 5439 kind = self._prev.text.lower() 5440 5441 if self._match_text_seq("NO", "ACTION"): 5442 action = "NO ACTION" 5443 elif self._match(TokenType.SET): 5444 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5445 action = "SET " + self._prev.text.upper() 5446 else: 5447 self._advance() 5448 action = self._prev.text.upper() 5449 5450 options[kind] = action 5451 5452 return self.expression( 5453 exp.ForeignKey, 5454 expressions=expressions, 5455 reference=reference, 5456 **options, # type: ignore 5457 ) 5458 5459 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5460 return self._parse_field() 5461 5462 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5463 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5464 self._retreat(self._index - 1) 5465 return None 5466 5467 id_vars = self._parse_wrapped_id_vars() 5468 return self.expression( 5469 exp.PeriodForSystemTimeConstraint, 5470 this=seq_get(id_vars, 0), 5471 expression=seq_get(id_vars, 1), 5472 ) 5473 5474 def _parse_primary_key( 5475 self, wrapped_optional: bool = False, in_props: bool = False 5476 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5477 desc = ( 5478 self._match_set((TokenType.ASC, TokenType.DESC)) 5479 and self._prev.token_type == TokenType.DESC 5480 ) 5481 5482 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5483 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5484 5485 expressions = self._parse_wrapped_csv( 5486 self._parse_primary_key_part, optional=wrapped_optional 5487 ) 5488 options = self._parse_key_constraint_options() 5489 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5490 5491 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5492 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5493 5494 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5495 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5496 return this 5497 5498 bracket_kind = self._prev.token_type 5499 expressions = self._parse_csv( 5500 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5501 ) 5502 5503 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5504 self.raise_error("Expected ]") 5505 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5506 self.raise_error("Expected }") 5507 5508 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5509 if bracket_kind == TokenType.L_BRACE: 5510 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5511 elif not this: 5512 this = build_array_constructor( 5513 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5514 ) 5515 else: 5516 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5517 if constructor_type: 5518 return build_array_constructor( 5519 constructor_type, 5520 args=expressions, 5521 bracket_kind=bracket_kind, 5522 dialect=self.dialect, 5523 ) 5524 5525 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5526 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5527 5528 self._add_comments(this) 5529 return self._parse_bracket(this) 5530 5531 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5532 if self._match(TokenType.COLON): 5533 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5534 return this 5535 5536 def _parse_case(self) -> t.Optional[exp.Expression]: 5537 ifs = [] 5538 default = None 5539 5540 comments = self._prev_comments 5541 expression = self._parse_assignment() 5542 5543 while self._match(TokenType.WHEN): 5544 this = self._parse_assignment() 5545 self._match(TokenType.THEN) 5546 then = self._parse_assignment() 5547 ifs.append(self.expression(exp.If, this=this, true=then)) 5548 5549 if self._match(TokenType.ELSE): 5550 default = self._parse_assignment() 5551 5552 if not self._match(TokenType.END): 5553 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5554 default = exp.column("interval") 5555 else: 5556 self.raise_error("Expected END after CASE", self._prev) 5557 5558 return self.expression( 5559 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5560 ) 5561 5562 def _parse_if(self) -> t.Optional[exp.Expression]: 5563 if self._match(TokenType.L_PAREN): 5564 args = self._parse_csv(self._parse_assignment) 5565 this = self.validate_expression(exp.If.from_arg_list(args), args) 5566 self._match_r_paren() 5567 else: 5568 index = self._index - 1 5569 5570 if self.NO_PAREN_IF_COMMANDS and index == 0: 5571 return self._parse_as_command(self._prev) 5572 5573 condition = self._parse_assignment() 5574 5575 if not condition: 5576 self._retreat(index) 5577 return None 5578 5579 self._match(TokenType.THEN) 5580 true = self._parse_assignment() 5581 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5582 self._match(TokenType.END) 5583 this = self.expression(exp.If, this=condition, true=true, false=false) 5584 5585 return this 5586 5587 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5588 if not self._match_text_seq("VALUE", "FOR"): 5589 self._retreat(self._index - 1) 5590 return None 5591 5592 return self.expression( 5593 exp.NextValueFor, 5594 this=self._parse_column(), 5595 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5596 ) 5597 5598 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5599 this = self._parse_function() or self._parse_var_or_string(upper=True) 5600 5601 if self._match(TokenType.FROM): 5602 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5603 5604 if not self._match(TokenType.COMMA): 5605 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5606 5607 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5608 5609 def _parse_gap_fill(self) -> exp.GapFill: 5610 self._match(TokenType.TABLE) 5611 this = self._parse_table() 5612 5613 self._match(TokenType.COMMA) 5614 args = [this, *self._parse_csv(self._parse_lambda)] 5615 5616 gap_fill = exp.GapFill.from_arg_list(args) 5617 return self.validate_expression(gap_fill, args) 5618 5619 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5620 this = self._parse_assignment() 5621 5622 if not self._match(TokenType.ALIAS): 5623 if self._match(TokenType.COMMA): 5624 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5625 5626 self.raise_error("Expected AS after CAST") 5627 5628 fmt = None 5629 to = self._parse_types() 5630 5631 if self._match(TokenType.FORMAT): 5632 fmt_string = self._parse_string() 5633 fmt = self._parse_at_time_zone(fmt_string) 5634 5635 if not to: 5636 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5637 if to.this in exp.DataType.TEMPORAL_TYPES: 5638 this = self.expression( 5639 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5640 this=this, 5641 format=exp.Literal.string( 5642 format_time( 5643 fmt_string.this if fmt_string else "", 5644 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5645 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5646 ) 5647 ), 5648 safe=safe, 5649 ) 5650 5651 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5652 this.set("zone", fmt.args["zone"]) 5653 return this 5654 elif not to: 5655 self.raise_error("Expected TYPE after CAST") 5656 elif isinstance(to, exp.Identifier): 5657 to = exp.DataType.build(to.name, udt=True) 5658 elif to.this == exp.DataType.Type.CHAR: 5659 if self._match(TokenType.CHARACTER_SET): 5660 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5661 5662 return self.expression( 5663 exp.Cast if strict else exp.TryCast, 5664 this=this, 5665 to=to, 5666 format=fmt, 5667 safe=safe, 5668 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5669 ) 5670 5671 def _parse_string_agg(self) -> exp.Expression: 5672 if self._match(TokenType.DISTINCT): 5673 args: t.List[t.Optional[exp.Expression]] = [ 5674 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5675 ] 5676 if self._match(TokenType.COMMA): 5677 args.extend(self._parse_csv(self._parse_assignment)) 5678 else: 5679 args = self._parse_csv(self._parse_assignment) # type: ignore 5680 5681 index = self._index 5682 if not self._match(TokenType.R_PAREN) and args: 5683 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5684 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5685 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5686 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5687 5688 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5689 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5690 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5691 if not self._match_text_seq("WITHIN", "GROUP"): 5692 self._retreat(index) 5693 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5694 5695 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5696 order = self._parse_order(this=seq_get(args, 0)) 5697 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5698 5699 def _parse_convert( 5700 self, strict: bool, safe: t.Optional[bool] = None 5701 ) -> t.Optional[exp.Expression]: 5702 this = self._parse_bitwise() 5703 5704 if self._match(TokenType.USING): 5705 to: t.Optional[exp.Expression] = self.expression( 5706 exp.CharacterSet, this=self._parse_var() 5707 ) 5708 elif self._match(TokenType.COMMA): 5709 to = self._parse_types() 5710 else: 5711 to = None 5712 5713 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5714 5715 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5716 """ 5717 There are generally two variants of the DECODE function: 5718 5719 - DECODE(bin, charset) 5720 - DECODE(expression, search, result [, search, result] ... [, default]) 5721 5722 The second variant will always be parsed into a CASE expression. Note that NULL 5723 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5724 instead of relying on pattern matching. 5725 """ 5726 args = self._parse_csv(self._parse_assignment) 5727 5728 if len(args) < 3: 5729 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5730 5731 expression, *expressions = args 5732 if not expression: 5733 return None 5734 5735 ifs = [] 5736 for search, result in zip(expressions[::2], expressions[1::2]): 5737 if not search or not result: 5738 return None 5739 5740 if isinstance(search, exp.Literal): 5741 ifs.append( 5742 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5743 ) 5744 elif isinstance(search, exp.Null): 5745 ifs.append( 5746 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5747 ) 5748 else: 5749 cond = exp.or_( 5750 exp.EQ(this=expression.copy(), expression=search), 5751 exp.and_( 5752 exp.Is(this=expression.copy(), expression=exp.Null()), 5753 exp.Is(this=search.copy(), expression=exp.Null()), 5754 copy=False, 5755 ), 5756 copy=False, 5757 ) 5758 ifs.append(exp.If(this=cond, true=result)) 5759 5760 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5761 5762 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5763 self._match_text_seq("KEY") 5764 key = self._parse_column() 5765 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5766 self._match_text_seq("VALUE") 5767 value = self._parse_bitwise() 5768 5769 if not key and not value: 5770 return None 5771 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5772 5773 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5774 if not this or not self._match_text_seq("FORMAT", "JSON"): 5775 return this 5776 5777 return self.expression(exp.FormatJson, this=this) 5778 5779 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5780 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5781 for value in values: 5782 if self._match_text_seq(value, "ON", on): 5783 return f"{value} ON {on}" 5784 5785 return None 5786 5787 @t.overload 5788 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5789 5790 @t.overload 5791 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5792 5793 def _parse_json_object(self, agg=False): 5794 star = self._parse_star() 5795 expressions = ( 5796 [star] 5797 if star 5798 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5799 ) 5800 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5801 5802 unique_keys = None 5803 if self._match_text_seq("WITH", "UNIQUE"): 5804 unique_keys = True 5805 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5806 unique_keys = False 5807 5808 self._match_text_seq("KEYS") 5809 5810 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5811 self._parse_type() 5812 ) 5813 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5814 5815 return self.expression( 5816 exp.JSONObjectAgg if agg else exp.JSONObject, 5817 expressions=expressions, 5818 null_handling=null_handling, 5819 unique_keys=unique_keys, 5820 return_type=return_type, 5821 encoding=encoding, 5822 ) 5823 5824 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5825 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5826 if not self._match_text_seq("NESTED"): 5827 this = self._parse_id_var() 5828 kind = self._parse_types(allow_identifiers=False) 5829 nested = None 5830 else: 5831 this = None 5832 kind = None 5833 nested = True 5834 5835 path = self._match_text_seq("PATH") and self._parse_string() 5836 nested_schema = nested and self._parse_json_schema() 5837 5838 return self.expression( 5839 exp.JSONColumnDef, 5840 this=this, 5841 kind=kind, 5842 path=path, 5843 nested_schema=nested_schema, 5844 ) 5845 5846 def _parse_json_schema(self) -> exp.JSONSchema: 5847 self._match_text_seq("COLUMNS") 5848 return self.expression( 5849 exp.JSONSchema, 5850 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5851 ) 5852 5853 def _parse_json_table(self) -> exp.JSONTable: 5854 this = self._parse_format_json(self._parse_bitwise()) 5855 path = self._match(TokenType.COMMA) and self._parse_string() 5856 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5857 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5858 schema = self._parse_json_schema() 5859 5860 return exp.JSONTable( 5861 this=this, 5862 schema=schema, 5863 path=path, 5864 error_handling=error_handling, 5865 empty_handling=empty_handling, 5866 ) 5867 5868 def _parse_match_against(self) -> exp.MatchAgainst: 5869 expressions = self._parse_csv(self._parse_column) 5870 5871 self._match_text_seq(")", "AGAINST", "(") 5872 5873 this = self._parse_string() 5874 5875 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5876 modifier = "IN NATURAL LANGUAGE MODE" 5877 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5878 modifier = f"{modifier} WITH QUERY EXPANSION" 5879 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5880 modifier = "IN BOOLEAN MODE" 5881 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5882 modifier = "WITH QUERY EXPANSION" 5883 else: 5884 modifier = None 5885 5886 return self.expression( 5887 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5888 ) 5889 5890 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5891 def _parse_open_json(self) -> exp.OpenJSON: 5892 this = self._parse_bitwise() 5893 path = self._match(TokenType.COMMA) and self._parse_string() 5894 5895 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5896 this = self._parse_field(any_token=True) 5897 kind = self._parse_types() 5898 path = self._parse_string() 5899 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5900 5901 return self.expression( 5902 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5903 ) 5904 5905 expressions = None 5906 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5907 self._match_l_paren() 5908 expressions = self._parse_csv(_parse_open_json_column_def) 5909 5910 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5911 5912 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5913 args = self._parse_csv(self._parse_bitwise) 5914 5915 if self._match(TokenType.IN): 5916 return self.expression( 5917 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5918 ) 5919 5920 if haystack_first: 5921 haystack = seq_get(args, 0) 5922 needle = seq_get(args, 1) 5923 else: 5924 needle = seq_get(args, 0) 5925 haystack = seq_get(args, 1) 5926 5927 return self.expression( 5928 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5929 ) 5930 5931 def _parse_predict(self) -> exp.Predict: 5932 self._match_text_seq("MODEL") 5933 this = self._parse_table() 5934 5935 self._match(TokenType.COMMA) 5936 self._match_text_seq("TABLE") 5937 5938 return self.expression( 5939 exp.Predict, 5940 this=this, 5941 expression=self._parse_table(), 5942 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5943 ) 5944 5945 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5946 args = self._parse_csv(self._parse_table) 5947 return exp.JoinHint(this=func_name.upper(), expressions=args) 5948 5949 def _parse_substring(self) -> exp.Substring: 5950 # Postgres supports the form: substring(string [from int] [for int]) 5951 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5952 5953 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5954 5955 if self._match(TokenType.FROM): 5956 args.append(self._parse_bitwise()) 5957 if self._match(TokenType.FOR): 5958 if len(args) == 1: 5959 args.append(exp.Literal.number(1)) 5960 args.append(self._parse_bitwise()) 5961 5962 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5963 5964 def _parse_trim(self) -> exp.Trim: 5965 # https://www.w3resource.com/sql/character-functions/trim.php 5966 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5967 5968 position = None 5969 collation = None 5970 expression = None 5971 5972 if self._match_texts(self.TRIM_TYPES): 5973 position = self._prev.text.upper() 5974 5975 this = self._parse_bitwise() 5976 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5977 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5978 expression = self._parse_bitwise() 5979 5980 if invert_order: 5981 this, expression = expression, this 5982 5983 if self._match(TokenType.COLLATE): 5984 collation = self._parse_bitwise() 5985 5986 return self.expression( 5987 exp.Trim, this=this, position=position, expression=expression, collation=collation 5988 ) 5989 5990 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5991 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5992 5993 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5994 return self._parse_window(self._parse_id_var(), alias=True) 5995 5996 def _parse_respect_or_ignore_nulls( 5997 self, this: t.Optional[exp.Expression] 5998 ) -> t.Optional[exp.Expression]: 5999 if self._match_text_seq("IGNORE", "NULLS"): 6000 return self.expression(exp.IgnoreNulls, this=this) 6001 if self._match_text_seq("RESPECT", "NULLS"): 6002 return self.expression(exp.RespectNulls, this=this) 6003 return this 6004 6005 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6006 if self._match(TokenType.HAVING): 6007 self._match_texts(("MAX", "MIN")) 6008 max = self._prev.text.upper() != "MIN" 6009 return self.expression( 6010 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6011 ) 6012 6013 return this 6014 6015 def _parse_window( 6016 self, this: t.Optional[exp.Expression], alias: bool = False 6017 ) -> t.Optional[exp.Expression]: 6018 func = this 6019 comments = func.comments if isinstance(func, exp.Expression) else None 6020 6021 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6022 self._match(TokenType.WHERE) 6023 this = self.expression( 6024 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6025 ) 6026 self._match_r_paren() 6027 6028 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6029 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6030 if self._match_text_seq("WITHIN", "GROUP"): 6031 order = self._parse_wrapped(self._parse_order) 6032 this = self.expression(exp.WithinGroup, this=this, expression=order) 6033 6034 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6035 # Some dialects choose to implement and some do not. 6036 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6037 6038 # There is some code above in _parse_lambda that handles 6039 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6040 6041 # The below changes handle 6042 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6043 6044 # Oracle allows both formats 6045 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6046 # and Snowflake chose to do the same for familiarity 6047 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6048 if isinstance(this, exp.AggFunc): 6049 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6050 6051 if ignore_respect and ignore_respect is not this: 6052 ignore_respect.replace(ignore_respect.this) 6053 this = self.expression(ignore_respect.__class__, this=this) 6054 6055 this = self._parse_respect_or_ignore_nulls(this) 6056 6057 # bigquery select from window x AS (partition by ...) 6058 if alias: 6059 over = None 6060 self._match(TokenType.ALIAS) 6061 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6062 return this 6063 else: 6064 over = self._prev.text.upper() 6065 6066 if comments and isinstance(func, exp.Expression): 6067 func.pop_comments() 6068 6069 if not self._match(TokenType.L_PAREN): 6070 return self.expression( 6071 exp.Window, 6072 comments=comments, 6073 this=this, 6074 alias=self._parse_id_var(False), 6075 over=over, 6076 ) 6077 6078 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6079 6080 first = self._match(TokenType.FIRST) 6081 if self._match_text_seq("LAST"): 6082 first = False 6083 6084 partition, order = self._parse_partition_and_order() 6085 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6086 6087 if kind: 6088 self._match(TokenType.BETWEEN) 6089 start = self._parse_window_spec() 6090 self._match(TokenType.AND) 6091 end = self._parse_window_spec() 6092 6093 spec = self.expression( 6094 exp.WindowSpec, 6095 kind=kind, 6096 start=start["value"], 6097 start_side=start["side"], 6098 end=end["value"], 6099 end_side=end["side"], 6100 ) 6101 else: 6102 spec = None 6103 6104 self._match_r_paren() 6105 6106 window = self.expression( 6107 exp.Window, 6108 comments=comments, 6109 this=this, 6110 partition_by=partition, 6111 order=order, 6112 spec=spec, 6113 alias=window_alias, 6114 over=over, 6115 first=first, 6116 ) 6117 6118 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6119 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6120 return self._parse_window(window, alias=alias) 6121 6122 return window 6123 6124 def _parse_partition_and_order( 6125 self, 6126 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6127 return self._parse_partition_by(), self._parse_order() 6128 6129 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6130 self._match(TokenType.BETWEEN) 6131 6132 return { 6133 "value": ( 6134 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6135 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6136 or self._parse_bitwise() 6137 ), 6138 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6139 } 6140 6141 def _parse_alias( 6142 self, this: t.Optional[exp.Expression], explicit: bool = False 6143 ) -> t.Optional[exp.Expression]: 6144 any_token = self._match(TokenType.ALIAS) 6145 comments = self._prev_comments or [] 6146 6147 if explicit and not any_token: 6148 return this 6149 6150 if self._match(TokenType.L_PAREN): 6151 aliases = self.expression( 6152 exp.Aliases, 6153 comments=comments, 6154 this=this, 6155 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6156 ) 6157 self._match_r_paren(aliases) 6158 return aliases 6159 6160 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6161 self.STRING_ALIASES and self._parse_string_as_identifier() 6162 ) 6163 6164 if alias: 6165 comments.extend(alias.pop_comments()) 6166 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6167 column = this.this 6168 6169 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6170 if not this.comments and column and column.comments: 6171 this.comments = column.pop_comments() 6172 6173 return this 6174 6175 def _parse_id_var( 6176 self, 6177 any_token: bool = True, 6178 tokens: t.Optional[t.Collection[TokenType]] = None, 6179 ) -> t.Optional[exp.Expression]: 6180 expression = self._parse_identifier() 6181 if not expression and ( 6182 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6183 ): 6184 quoted = self._prev.token_type == TokenType.STRING 6185 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6186 6187 return expression 6188 6189 def _parse_string(self) -> t.Optional[exp.Expression]: 6190 if self._match_set(self.STRING_PARSERS): 6191 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6192 return self._parse_placeholder() 6193 6194 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6195 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6196 6197 def _parse_number(self) -> t.Optional[exp.Expression]: 6198 if self._match_set(self.NUMERIC_PARSERS): 6199 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6200 return self._parse_placeholder() 6201 6202 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6203 if self._match(TokenType.IDENTIFIER): 6204 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6205 return self._parse_placeholder() 6206 6207 def _parse_var( 6208 self, 6209 any_token: bool = False, 6210 tokens: t.Optional[t.Collection[TokenType]] = None, 6211 upper: bool = False, 6212 ) -> t.Optional[exp.Expression]: 6213 if ( 6214 (any_token and self._advance_any()) 6215 or self._match(TokenType.VAR) 6216 or (self._match_set(tokens) if tokens else False) 6217 ): 6218 return self.expression( 6219 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6220 ) 6221 return self._parse_placeholder() 6222 6223 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6224 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6225 self._advance() 6226 return self._prev 6227 return None 6228 6229 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6230 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6231 6232 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6233 return self._parse_primary() or self._parse_var(any_token=True) 6234 6235 def _parse_null(self) -> t.Optional[exp.Expression]: 6236 if self._match_set(self.NULL_TOKENS): 6237 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6238 return self._parse_placeholder() 6239 6240 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6241 if self._match(TokenType.TRUE): 6242 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6243 if self._match(TokenType.FALSE): 6244 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6245 return self._parse_placeholder() 6246 6247 def _parse_star(self) -> t.Optional[exp.Expression]: 6248 if self._match(TokenType.STAR): 6249 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6250 return self._parse_placeholder() 6251 6252 def _parse_parameter(self) -> exp.Parameter: 6253 this = self._parse_identifier() or self._parse_primary_or_var() 6254 return self.expression(exp.Parameter, this=this) 6255 6256 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6257 if self._match_set(self.PLACEHOLDER_PARSERS): 6258 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6259 if placeholder: 6260 return placeholder 6261 self._advance(-1) 6262 return None 6263 6264 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6265 if not self._match_texts(keywords): 6266 return None 6267 if self._match(TokenType.L_PAREN, advance=False): 6268 return self._parse_wrapped_csv(self._parse_expression) 6269 6270 expression = self._parse_expression() 6271 return [expression] if expression else None 6272 6273 def _parse_csv( 6274 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6275 ) -> t.List[exp.Expression]: 6276 parse_result = parse_method() 6277 items = [parse_result] if parse_result is not None else [] 6278 6279 while self._match(sep): 6280 self._add_comments(parse_result) 6281 parse_result = parse_method() 6282 if parse_result is not None: 6283 items.append(parse_result) 6284 6285 return items 6286 6287 def _parse_tokens( 6288 self, parse_method: t.Callable, expressions: t.Dict 6289 ) -> t.Optional[exp.Expression]: 6290 this = parse_method() 6291 6292 while self._match_set(expressions): 6293 this = self.expression( 6294 expressions[self._prev.token_type], 6295 this=this, 6296 comments=self._prev_comments, 6297 expression=parse_method(), 6298 ) 6299 6300 return this 6301 6302 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6303 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6304 6305 def _parse_wrapped_csv( 6306 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6307 ) -> t.List[exp.Expression]: 6308 return self._parse_wrapped( 6309 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6310 ) 6311 6312 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6313 wrapped = self._match(TokenType.L_PAREN) 6314 if not wrapped and not optional: 6315 self.raise_error("Expecting (") 6316 parse_result = parse_method() 6317 if wrapped: 6318 self._match_r_paren() 6319 return parse_result 6320 6321 def _parse_expressions(self) -> t.List[exp.Expression]: 6322 return self._parse_csv(self._parse_expression) 6323 6324 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6325 return self._parse_select() or self._parse_set_operations( 6326 self._parse_expression() if alias else self._parse_assignment() 6327 ) 6328 6329 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6330 return self._parse_query_modifiers( 6331 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6332 ) 6333 6334 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6335 this = None 6336 if self._match_texts(self.TRANSACTION_KIND): 6337 this = self._prev.text 6338 6339 self._match_texts(("TRANSACTION", "WORK")) 6340 6341 modes = [] 6342 while True: 6343 mode = [] 6344 while self._match(TokenType.VAR): 6345 mode.append(self._prev.text) 6346 6347 if mode: 6348 modes.append(" ".join(mode)) 6349 if not self._match(TokenType.COMMA): 6350 break 6351 6352 return self.expression(exp.Transaction, this=this, modes=modes) 6353 6354 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6355 chain = None 6356 savepoint = None 6357 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6358 6359 self._match_texts(("TRANSACTION", "WORK")) 6360 6361 if self._match_text_seq("TO"): 6362 self._match_text_seq("SAVEPOINT") 6363 savepoint = self._parse_id_var() 6364 6365 if self._match(TokenType.AND): 6366 chain = not self._match_text_seq("NO") 6367 self._match_text_seq("CHAIN") 6368 6369 if is_rollback: 6370 return self.expression(exp.Rollback, savepoint=savepoint) 6371 6372 return self.expression(exp.Commit, chain=chain) 6373 6374 def _parse_refresh(self) -> exp.Refresh: 6375 self._match(TokenType.TABLE) 6376 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6377 6378 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6379 if not self._match_text_seq("ADD"): 6380 return None 6381 6382 self._match(TokenType.COLUMN) 6383 exists_column = self._parse_exists(not_=True) 6384 expression = self._parse_field_def() 6385 6386 if expression: 6387 expression.set("exists", exists_column) 6388 6389 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6390 if self._match_texts(("FIRST", "AFTER")): 6391 position = self._prev.text 6392 column_position = self.expression( 6393 exp.ColumnPosition, this=self._parse_column(), position=position 6394 ) 6395 expression.set("position", column_position) 6396 6397 return expression 6398 6399 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6400 drop = self._match(TokenType.DROP) and self._parse_drop() 6401 if drop and not isinstance(drop, exp.Command): 6402 drop.set("kind", drop.args.get("kind", "COLUMN")) 6403 return drop 6404 6405 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6406 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6407 return self.expression( 6408 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6409 ) 6410 6411 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6412 index = self._index - 1 6413 6414 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6415 return self._parse_csv( 6416 lambda: self.expression( 6417 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6418 ) 6419 ) 6420 6421 self._retreat(index) 6422 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6423 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6424 6425 if self._match_text_seq("ADD", "COLUMNS"): 6426 schema = self._parse_schema() 6427 if schema: 6428 return [schema] 6429 return [] 6430 6431 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6432 6433 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6434 if self._match_texts(self.ALTER_ALTER_PARSERS): 6435 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6436 6437 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6438 # keyword after ALTER we default to parsing this statement 6439 self._match(TokenType.COLUMN) 6440 column = self._parse_field(any_token=True) 6441 6442 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6443 return self.expression(exp.AlterColumn, this=column, drop=True) 6444 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6445 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6446 if self._match(TokenType.COMMENT): 6447 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6448 if self._match_text_seq("DROP", "NOT", "NULL"): 6449 return self.expression( 6450 exp.AlterColumn, 6451 this=column, 6452 drop=True, 6453 allow_null=True, 6454 ) 6455 if self._match_text_seq("SET", "NOT", "NULL"): 6456 return self.expression( 6457 exp.AlterColumn, 6458 this=column, 6459 allow_null=False, 6460 ) 6461 self._match_text_seq("SET", "DATA") 6462 self._match_text_seq("TYPE") 6463 return self.expression( 6464 exp.AlterColumn, 6465 this=column, 6466 dtype=self._parse_types(), 6467 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6468 using=self._match(TokenType.USING) and self._parse_assignment(), 6469 ) 6470 6471 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6472 if self._match_texts(("ALL", "EVEN", "AUTO")): 6473 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6474 6475 self._match_text_seq("KEY", "DISTKEY") 6476 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6477 6478 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6479 if compound: 6480 self._match_text_seq("SORTKEY") 6481 6482 if self._match(TokenType.L_PAREN, advance=False): 6483 return self.expression( 6484 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6485 ) 6486 6487 self._match_texts(("AUTO", "NONE")) 6488 return self.expression( 6489 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6490 ) 6491 6492 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6493 index = self._index - 1 6494 6495 partition_exists = self._parse_exists() 6496 if self._match(TokenType.PARTITION, advance=False): 6497 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6498 6499 self._retreat(index) 6500 return self._parse_csv(self._parse_drop_column) 6501 6502 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6503 if self._match(TokenType.COLUMN): 6504 exists = self._parse_exists() 6505 old_column = self._parse_column() 6506 to = self._match_text_seq("TO") 6507 new_column = self._parse_column() 6508 6509 if old_column is None or to is None or new_column is None: 6510 return None 6511 6512 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6513 6514 self._match_text_seq("TO") 6515 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6516 6517 def _parse_alter_table_set(self) -> exp.AlterSet: 6518 alter_set = self.expression(exp.AlterSet) 6519 6520 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6521 "TABLE", "PROPERTIES" 6522 ): 6523 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6524 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6525 alter_set.set("expressions", [self._parse_assignment()]) 6526 elif self._match_texts(("LOGGED", "UNLOGGED")): 6527 alter_set.set("option", exp.var(self._prev.text.upper())) 6528 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6529 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6530 elif self._match_text_seq("LOCATION"): 6531 alter_set.set("location", self._parse_field()) 6532 elif self._match_text_seq("ACCESS", "METHOD"): 6533 alter_set.set("access_method", self._parse_field()) 6534 elif self._match_text_seq("TABLESPACE"): 6535 alter_set.set("tablespace", self._parse_field()) 6536 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6537 alter_set.set("file_format", [self._parse_field()]) 6538 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6539 alter_set.set("file_format", self._parse_wrapped_options()) 6540 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6541 alter_set.set("copy_options", self._parse_wrapped_options()) 6542 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6543 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6544 else: 6545 if self._match_text_seq("SERDE"): 6546 alter_set.set("serde", self._parse_field()) 6547 6548 alter_set.set("expressions", [self._parse_properties()]) 6549 6550 return alter_set 6551 6552 def _parse_alter(self) -> exp.Alter | exp.Command: 6553 start = self._prev 6554 6555 alter_token = self._match_set(self.ALTERABLES) and self._prev 6556 if not alter_token: 6557 return self._parse_as_command(start) 6558 6559 exists = self._parse_exists() 6560 only = self._match_text_seq("ONLY") 6561 this = self._parse_table(schema=True) 6562 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6563 6564 if self._next: 6565 self._advance() 6566 6567 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6568 if parser: 6569 actions = ensure_list(parser(self)) 6570 options = self._parse_csv(self._parse_property) 6571 6572 if not self._curr and actions: 6573 return self.expression( 6574 exp.Alter, 6575 this=this, 6576 kind=alter_token.text.upper(), 6577 exists=exists, 6578 actions=actions, 6579 only=only, 6580 options=options, 6581 cluster=cluster, 6582 ) 6583 6584 return self._parse_as_command(start) 6585 6586 def _parse_merge(self) -> exp.Merge: 6587 self._match(TokenType.INTO) 6588 target = self._parse_table() 6589 6590 if target and self._match(TokenType.ALIAS, advance=False): 6591 target.set("alias", self._parse_table_alias()) 6592 6593 self._match(TokenType.USING) 6594 using = self._parse_table() 6595 6596 self._match(TokenType.ON) 6597 on = self._parse_assignment() 6598 6599 return self.expression( 6600 exp.Merge, 6601 this=target, 6602 using=using, 6603 on=on, 6604 expressions=self._parse_when_matched(), 6605 ) 6606 6607 def _parse_when_matched(self) -> t.List[exp.When]: 6608 whens = [] 6609 6610 while self._match(TokenType.WHEN): 6611 matched = not self._match(TokenType.NOT) 6612 self._match_text_seq("MATCHED") 6613 source = ( 6614 False 6615 if self._match_text_seq("BY", "TARGET") 6616 else self._match_text_seq("BY", "SOURCE") 6617 ) 6618 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6619 6620 self._match(TokenType.THEN) 6621 6622 if self._match(TokenType.INSERT): 6623 _this = self._parse_star() 6624 if _this: 6625 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6626 else: 6627 then = self.expression( 6628 exp.Insert, 6629 this=self._parse_value(), 6630 expression=self._match_text_seq("VALUES") and self._parse_value(), 6631 ) 6632 elif self._match(TokenType.UPDATE): 6633 expressions = self._parse_star() 6634 if expressions: 6635 then = self.expression(exp.Update, expressions=expressions) 6636 else: 6637 then = self.expression( 6638 exp.Update, 6639 expressions=self._match(TokenType.SET) 6640 and self._parse_csv(self._parse_equality), 6641 ) 6642 elif self._match(TokenType.DELETE): 6643 then = self.expression(exp.Var, this=self._prev.text) 6644 else: 6645 then = None 6646 6647 whens.append( 6648 self.expression( 6649 exp.When, 6650 matched=matched, 6651 source=source, 6652 condition=condition, 6653 then=then, 6654 ) 6655 ) 6656 return whens 6657 6658 def _parse_show(self) -> t.Optional[exp.Expression]: 6659 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6660 if parser: 6661 return parser(self) 6662 return self._parse_as_command(self._prev) 6663 6664 def _parse_set_item_assignment( 6665 self, kind: t.Optional[str] = None 6666 ) -> t.Optional[exp.Expression]: 6667 index = self._index 6668 6669 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6670 return self._parse_set_transaction(global_=kind == "GLOBAL") 6671 6672 left = self._parse_primary() or self._parse_column() 6673 assignment_delimiter = self._match_texts(("=", "TO")) 6674 6675 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6676 self._retreat(index) 6677 return None 6678 6679 right = self._parse_statement() or self._parse_id_var() 6680 if isinstance(right, (exp.Column, exp.Identifier)): 6681 right = exp.var(right.name) 6682 6683 this = self.expression(exp.EQ, this=left, expression=right) 6684 return self.expression(exp.SetItem, this=this, kind=kind) 6685 6686 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6687 self._match_text_seq("TRANSACTION") 6688 characteristics = self._parse_csv( 6689 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6690 ) 6691 return self.expression( 6692 exp.SetItem, 6693 expressions=characteristics, 6694 kind="TRANSACTION", 6695 **{"global": global_}, # type: ignore 6696 ) 6697 6698 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6699 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6700 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6701 6702 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6703 index = self._index 6704 set_ = self.expression( 6705 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6706 ) 6707 6708 if self._curr: 6709 self._retreat(index) 6710 return self._parse_as_command(self._prev) 6711 6712 return set_ 6713 6714 def _parse_var_from_options( 6715 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6716 ) -> t.Optional[exp.Var]: 6717 start = self._curr 6718 if not start: 6719 return None 6720 6721 option = start.text.upper() 6722 continuations = options.get(option) 6723 6724 index = self._index 6725 self._advance() 6726 for keywords in continuations or []: 6727 if isinstance(keywords, str): 6728 keywords = (keywords,) 6729 6730 if self._match_text_seq(*keywords): 6731 option = f"{option} {' '.join(keywords)}" 6732 break 6733 else: 6734 if continuations or continuations is None: 6735 if raise_unmatched: 6736 self.raise_error(f"Unknown option {option}") 6737 6738 self._retreat(index) 6739 return None 6740 6741 return exp.var(option) 6742 6743 def _parse_as_command(self, start: Token) -> exp.Command: 6744 while self._curr: 6745 self._advance() 6746 text = self._find_sql(start, self._prev) 6747 size = len(start.text) 6748 self._warn_unsupported() 6749 return exp.Command(this=text[:size], expression=text[size:]) 6750 6751 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6752 settings = [] 6753 6754 self._match_l_paren() 6755 kind = self._parse_id_var() 6756 6757 if self._match(TokenType.L_PAREN): 6758 while True: 6759 key = self._parse_id_var() 6760 value = self._parse_primary() 6761 6762 if not key and value is None: 6763 break 6764 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6765 self._match(TokenType.R_PAREN) 6766 6767 self._match_r_paren() 6768 6769 return self.expression( 6770 exp.DictProperty, 6771 this=this, 6772 kind=kind.this if kind else None, 6773 settings=settings, 6774 ) 6775 6776 def _parse_dict_range(self, this: str) -> exp.DictRange: 6777 self._match_l_paren() 6778 has_min = self._match_text_seq("MIN") 6779 if has_min: 6780 min = self._parse_var() or self._parse_primary() 6781 self._match_text_seq("MAX") 6782 max = self._parse_var() or self._parse_primary() 6783 else: 6784 max = self._parse_var() or self._parse_primary() 6785 min = exp.Literal.number(0) 6786 self._match_r_paren() 6787 return self.expression(exp.DictRange, this=this, min=min, max=max) 6788 6789 def _parse_comprehension( 6790 self, this: t.Optional[exp.Expression] 6791 ) -> t.Optional[exp.Comprehension]: 6792 index = self._index 6793 expression = self._parse_column() 6794 if not self._match(TokenType.IN): 6795 self._retreat(index - 1) 6796 return None 6797 iterator = self._parse_column() 6798 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6799 return self.expression( 6800 exp.Comprehension, 6801 this=this, 6802 expression=expression, 6803 iterator=iterator, 6804 condition=condition, 6805 ) 6806 6807 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6808 if self._match(TokenType.HEREDOC_STRING): 6809 return self.expression(exp.Heredoc, this=self._prev.text) 6810 6811 if not self._match_text_seq("$"): 6812 return None 6813 6814 tags = ["$"] 6815 tag_text = None 6816 6817 if self._is_connected(): 6818 self._advance() 6819 tags.append(self._prev.text.upper()) 6820 else: 6821 self.raise_error("No closing $ found") 6822 6823 if tags[-1] != "$": 6824 if self._is_connected() and self._match_text_seq("$"): 6825 tag_text = tags[-1] 6826 tags.append("$") 6827 else: 6828 self.raise_error("No closing $ found") 6829 6830 heredoc_start = self._curr 6831 6832 while self._curr: 6833 if self._match_text_seq(*tags, advance=False): 6834 this = self._find_sql(heredoc_start, self._prev) 6835 self._advance(len(tags)) 6836 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6837 6838 self._advance() 6839 6840 self.raise_error(f"No closing {''.join(tags)} found") 6841 return None 6842 6843 def _find_parser( 6844 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6845 ) -> t.Optional[t.Callable]: 6846 if not self._curr: 6847 return None 6848 6849 index = self._index 6850 this = [] 6851 while True: 6852 # The current token might be multiple words 6853 curr = self._curr.text.upper() 6854 key = curr.split(" ") 6855 this.append(curr) 6856 6857 self._advance() 6858 result, trie = in_trie(trie, key) 6859 if result == TrieResult.FAILED: 6860 break 6861 6862 if result == TrieResult.EXISTS: 6863 subparser = parsers[" ".join(this)] 6864 return subparser 6865 6866 self._retreat(index) 6867 return None 6868 6869 def _match(self, token_type, advance=True, expression=None): 6870 if not self._curr: 6871 return None 6872 6873 if self._curr.token_type == token_type: 6874 if advance: 6875 self._advance() 6876 self._add_comments(expression) 6877 return True 6878 6879 return None 6880 6881 def _match_set(self, types, advance=True): 6882 if not self._curr: 6883 return None 6884 6885 if self._curr.token_type in types: 6886 if advance: 6887 self._advance() 6888 return True 6889 6890 return None 6891 6892 def _match_pair(self, token_type_a, token_type_b, advance=True): 6893 if not self._curr or not self._next: 6894 return None 6895 6896 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6897 if advance: 6898 self._advance(2) 6899 return True 6900 6901 return None 6902 6903 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6904 if not self._match(TokenType.L_PAREN, expression=expression): 6905 self.raise_error("Expecting (") 6906 6907 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6908 if not self._match(TokenType.R_PAREN, expression=expression): 6909 self.raise_error("Expecting )") 6910 6911 def _match_texts(self, texts, advance=True): 6912 if self._curr and self._curr.text.upper() in texts: 6913 if advance: 6914 self._advance() 6915 return True 6916 return None 6917 6918 def _match_text_seq(self, *texts, advance=True): 6919 index = self._index 6920 for text in texts: 6921 if self._curr and self._curr.text.upper() == text: 6922 self._advance() 6923 else: 6924 self._retreat(index) 6925 return None 6926 6927 if not advance: 6928 self._retreat(index) 6929 6930 return True 6931 6932 def _replace_lambda( 6933 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6934 ) -> t.Optional[exp.Expression]: 6935 if not node: 6936 return node 6937 6938 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6939 6940 for column in node.find_all(exp.Column): 6941 typ = lambda_types.get(column.parts[0].name) 6942 if typ is not None: 6943 dot_or_id = column.to_dot() if column.table else column.this 6944 6945 if typ: 6946 dot_or_id = self.expression( 6947 exp.Cast, 6948 this=dot_or_id, 6949 to=typ, 6950 ) 6951 6952 parent = column.parent 6953 6954 while isinstance(parent, exp.Dot): 6955 if not isinstance(parent.parent, exp.Dot): 6956 parent.replace(dot_or_id) 6957 break 6958 parent = parent.parent 6959 else: 6960 if column is node: 6961 node = dot_or_id 6962 else: 6963 column.replace(dot_or_id) 6964 return node 6965 6966 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6967 start = self._prev 6968 6969 # Not to be confused with TRUNCATE(number, decimals) function call 6970 if self._match(TokenType.L_PAREN): 6971 self._retreat(self._index - 2) 6972 return self._parse_function() 6973 6974 # Clickhouse supports TRUNCATE DATABASE as well 6975 is_database = self._match(TokenType.DATABASE) 6976 6977 self._match(TokenType.TABLE) 6978 6979 exists = self._parse_exists(not_=False) 6980 6981 expressions = self._parse_csv( 6982 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6983 ) 6984 6985 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6986 6987 if self._match_text_seq("RESTART", "IDENTITY"): 6988 identity = "RESTART" 6989 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6990 identity = "CONTINUE" 6991 else: 6992 identity = None 6993 6994 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6995 option = self._prev.text 6996 else: 6997 option = None 6998 6999 partition = self._parse_partition() 7000 7001 # Fallback case 7002 if self._curr: 7003 return self._parse_as_command(start) 7004 7005 return self.expression( 7006 exp.TruncateTable, 7007 expressions=expressions, 7008 is_database=is_database, 7009 exists=exists, 7010 cluster=cluster, 7011 identity=identity, 7012 option=option, 7013 partition=partition, 7014 ) 7015 7016 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7017 this = self._parse_ordered(self._parse_opclass) 7018 7019 if not self._match(TokenType.WITH): 7020 return this 7021 7022 op = self._parse_var(any_token=True) 7023 7024 return self.expression(exp.WithOperator, this=this, op=op) 7025 7026 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7027 self._match(TokenType.EQ) 7028 self._match(TokenType.L_PAREN) 7029 7030 opts: t.List[t.Optional[exp.Expression]] = [] 7031 while self._curr and not self._match(TokenType.R_PAREN): 7032 if self._match_text_seq("FORMAT_NAME", "="): 7033 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7034 # so we parse it separately to use _parse_field() 7035 prop = self.expression( 7036 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7037 ) 7038 opts.append(prop) 7039 else: 7040 opts.append(self._parse_property()) 7041 7042 self._match(TokenType.COMMA) 7043 7044 return opts 7045 7046 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7047 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7048 7049 options = [] 7050 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7051 option = self._parse_var(any_token=True) 7052 prev = self._prev.text.upper() 7053 7054 # Different dialects might separate options and values by white space, "=" and "AS" 7055 self._match(TokenType.EQ) 7056 self._match(TokenType.ALIAS) 7057 7058 param = self.expression(exp.CopyParameter, this=option) 7059 7060 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7061 TokenType.L_PAREN, advance=False 7062 ): 7063 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7064 param.set("expressions", self._parse_wrapped_options()) 7065 elif prev == "FILE_FORMAT": 7066 # T-SQL's external file format case 7067 param.set("expression", self._parse_field()) 7068 else: 7069 param.set("expression", self._parse_unquoted_field()) 7070 7071 options.append(param) 7072 self._match(sep) 7073 7074 return options 7075 7076 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7077 expr = self.expression(exp.Credentials) 7078 7079 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7080 expr.set("storage", self._parse_field()) 7081 if self._match_text_seq("CREDENTIALS"): 7082 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7083 creds = ( 7084 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7085 ) 7086 expr.set("credentials", creds) 7087 if self._match_text_seq("ENCRYPTION"): 7088 expr.set("encryption", self._parse_wrapped_options()) 7089 if self._match_text_seq("IAM_ROLE"): 7090 expr.set("iam_role", self._parse_field()) 7091 if self._match_text_seq("REGION"): 7092 expr.set("region", self._parse_field()) 7093 7094 return expr 7095 7096 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7097 return self._parse_field() 7098 7099 def _parse_copy(self) -> exp.Copy | exp.Command: 7100 start = self._prev 7101 7102 self._match(TokenType.INTO) 7103 7104 this = ( 7105 self._parse_select(nested=True, parse_subquery_alias=False) 7106 if self._match(TokenType.L_PAREN, advance=False) 7107 else self._parse_table(schema=True) 7108 ) 7109 7110 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7111 7112 files = self._parse_csv(self._parse_file_location) 7113 credentials = self._parse_credentials() 7114 7115 self._match_text_seq("WITH") 7116 7117 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7118 7119 # Fallback case 7120 if self._curr: 7121 return self._parse_as_command(start) 7122 7123 return self.expression( 7124 exp.Copy, 7125 this=this, 7126 kind=kind, 7127 credentials=credentials, 7128 files=files, 7129 params=params, 7130 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1317 def __init__( 1318 self, 1319 error_level: t.Optional[ErrorLevel] = None, 1320 error_message_context: int = 100, 1321 max_errors: int = 3, 1322 dialect: DialectType = None, 1323 ): 1324 from sqlglot.dialects import Dialect 1325 1326 self.error_level = error_level or ErrorLevel.IMMEDIATE 1327 self.error_message_context = error_message_context 1328 self.max_errors = max_errors 1329 self.dialect = Dialect.get_or_raise(dialect) 1330 self.reset()
1342 def parse( 1343 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1344 ) -> t.List[t.Optional[exp.Expression]]: 1345 """ 1346 Parses a list of tokens and returns a list of syntax trees, one tree 1347 per parsed SQL statement. 1348 1349 Args: 1350 raw_tokens: The list of tokens. 1351 sql: The original SQL string, used to produce helpful debug messages. 1352 1353 Returns: 1354 The list of the produced syntax trees. 1355 """ 1356 return self._parse( 1357 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1358 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1360 def parse_into( 1361 self, 1362 expression_types: exp.IntoType, 1363 raw_tokens: t.List[Token], 1364 sql: t.Optional[str] = None, 1365 ) -> t.List[t.Optional[exp.Expression]]: 1366 """ 1367 Parses a list of tokens into a given Expression type. If a collection of Expression 1368 types is given instead, this method will try to parse the token list into each one 1369 of them, stopping at the first for which the parsing succeeds. 1370 1371 Args: 1372 expression_types: The expression type(s) to try and parse the token list into. 1373 raw_tokens: The list of tokens. 1374 sql: The original SQL string, used to produce helpful debug messages. 1375 1376 Returns: 1377 The target Expression. 1378 """ 1379 errors = [] 1380 for expression_type in ensure_list(expression_types): 1381 parser = self.EXPRESSION_PARSERS.get(expression_type) 1382 if not parser: 1383 raise TypeError(f"No parser registered for {expression_type}") 1384 1385 try: 1386 return self._parse(parser, raw_tokens, sql) 1387 except ParseError as e: 1388 e.errors[0]["into_expression"] = expression_type 1389 errors.append(e) 1390 1391 raise ParseError( 1392 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1393 errors=merge_errors(errors), 1394 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1434 def check_errors(self) -> None: 1435 """Logs or raises any found errors, depending on the chosen error level setting.""" 1436 if self.error_level == ErrorLevel.WARN: 1437 for error in self.errors: 1438 logger.error(str(error)) 1439 elif self.error_level == ErrorLevel.RAISE and self.errors: 1440 raise ParseError( 1441 concat_messages(self.errors, self.max_errors), 1442 errors=merge_errors(self.errors), 1443 )
Logs or raises any found errors, depending on the chosen error level setting.
1445 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1446 """ 1447 Appends an error in the list of recorded errors or raises it, depending on the chosen 1448 error level setting. 1449 """ 1450 token = token or self._curr or self._prev or Token.string("") 1451 start = token.start 1452 end = token.end + 1 1453 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1454 highlight = self.sql[start:end] 1455 end_context = self.sql[end : end + self.error_message_context] 1456 1457 error = ParseError.new( 1458 f"{message}. Line {token.line}, Col: {token.col}.\n" 1459 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1460 description=message, 1461 line=token.line, 1462 col=token.col, 1463 start_context=start_context, 1464 highlight=highlight, 1465 end_context=end_context, 1466 ) 1467 1468 if self.error_level == ErrorLevel.IMMEDIATE: 1469 raise error 1470 1471 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1473 def expression( 1474 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1475 ) -> E: 1476 """ 1477 Creates a new, validated Expression. 1478 1479 Args: 1480 exp_class: The expression class to instantiate. 1481 comments: An optional list of comments to attach to the expression. 1482 kwargs: The arguments to set for the expression along with their respective values. 1483 1484 Returns: 1485 The target expression. 1486 """ 1487 instance = exp_class(**kwargs) 1488 instance.add_comments(comments) if comments else self._add_comments(instance) 1489 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1496 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1497 """ 1498 Validates an Expression, making sure that all its mandatory arguments are set. 1499 1500 Args: 1501 expression: The expression to validate. 1502 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1503 1504 Returns: 1505 The validated expression. 1506 """ 1507 if self.error_level != ErrorLevel.IGNORE: 1508 for error_message in expression.error_messages(args): 1509 self.raise_error(error_message) 1510 1511 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.