sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 216 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 217 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 218 "LIKE": build_like, 219 "LOG": build_logarithm, 220 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 221 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 222 "LOWER": build_lower, 223 "LPAD": lambda args: build_pad(args), 224 "LEFTPAD": lambda args: build_pad(args), 225 "LTRIM": lambda args: build_trim(args), 226 "MOD": build_mod, 227 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 228 "RPAD": lambda args: build_pad(args, is_left=False), 229 "RTRIM": lambda args: build_trim(args, is_left=False), 230 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 231 if len(args) != 2 232 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 233 "TIME_TO_TIME_STR": lambda args: exp.Cast( 234 this=seq_get(args, 0), 235 to=exp.DataType(this=exp.DataType.Type.TEXT), 236 ), 237 "TO_HEX": build_hex, 238 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 239 this=exp.Cast( 240 this=seq_get(args, 0), 241 to=exp.DataType(this=exp.DataType.Type.TEXT), 242 ), 243 start=exp.Literal.number(1), 244 length=exp.Literal.number(10), 245 ), 246 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 247 "UPPER": build_upper, 248 "VAR_MAP": build_var_map, 249 } 250 251 NO_PAREN_FUNCTIONS = { 252 TokenType.CURRENT_DATE: exp.CurrentDate, 253 TokenType.CURRENT_DATETIME: exp.CurrentDate, 254 TokenType.CURRENT_TIME: exp.CurrentTime, 255 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 256 TokenType.CURRENT_USER: exp.CurrentUser, 257 } 258 259 STRUCT_TYPE_TOKENS = { 260 TokenType.NESTED, 261 TokenType.OBJECT, 262 TokenType.STRUCT, 263 TokenType.UNION, 264 } 265 266 NESTED_TYPE_TOKENS = { 267 TokenType.ARRAY, 268 TokenType.LIST, 269 TokenType.LOWCARDINALITY, 270 TokenType.MAP, 271 TokenType.NULLABLE, 272 *STRUCT_TYPE_TOKENS, 273 } 274 275 ENUM_TYPE_TOKENS = { 276 TokenType.ENUM, 277 TokenType.ENUM8, 278 TokenType.ENUM16, 279 } 280 281 AGGREGATE_TYPE_TOKENS = { 282 TokenType.AGGREGATEFUNCTION, 283 TokenType.SIMPLEAGGREGATEFUNCTION, 284 } 285 286 TYPE_TOKENS = { 287 TokenType.BIT, 288 TokenType.BOOLEAN, 289 TokenType.TINYINT, 290 TokenType.UTINYINT, 291 TokenType.SMALLINT, 292 TokenType.USMALLINT, 293 TokenType.INT, 294 TokenType.UINT, 295 TokenType.BIGINT, 296 TokenType.UBIGINT, 297 TokenType.INT128, 298 TokenType.UINT128, 299 TokenType.INT256, 300 TokenType.UINT256, 301 TokenType.MEDIUMINT, 302 TokenType.UMEDIUMINT, 303 TokenType.FIXEDSTRING, 304 TokenType.FLOAT, 305 TokenType.DOUBLE, 306 TokenType.CHAR, 307 TokenType.NCHAR, 308 TokenType.VARCHAR, 309 TokenType.NVARCHAR, 310 TokenType.BPCHAR, 311 TokenType.TEXT, 312 TokenType.MEDIUMTEXT, 313 TokenType.LONGTEXT, 314 TokenType.MEDIUMBLOB, 315 TokenType.LONGBLOB, 316 TokenType.BINARY, 317 TokenType.VARBINARY, 318 TokenType.JSON, 319 TokenType.JSONB, 320 TokenType.INTERVAL, 321 TokenType.TINYBLOB, 322 TokenType.TINYTEXT, 323 TokenType.TIME, 324 TokenType.TIMETZ, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMP_S, 327 TokenType.TIMESTAMP_MS, 328 TokenType.TIMESTAMP_NS, 329 TokenType.TIMESTAMPTZ, 330 TokenType.TIMESTAMPLTZ, 331 TokenType.TIMESTAMPNTZ, 332 TokenType.DATETIME, 333 TokenType.DATETIME64, 334 TokenType.DATE, 335 TokenType.DATE32, 336 TokenType.INT4RANGE, 337 TokenType.INT4MULTIRANGE, 338 TokenType.INT8RANGE, 339 TokenType.INT8MULTIRANGE, 340 TokenType.NUMRANGE, 341 TokenType.NUMMULTIRANGE, 342 TokenType.TSRANGE, 343 TokenType.TSMULTIRANGE, 344 TokenType.TSTZRANGE, 345 TokenType.TSTZMULTIRANGE, 346 TokenType.DATERANGE, 347 TokenType.DATEMULTIRANGE, 348 TokenType.DECIMAL, 349 TokenType.DECIMAL32, 350 TokenType.DECIMAL64, 351 TokenType.DECIMAL128, 352 TokenType.UDECIMAL, 353 TokenType.BIGDECIMAL, 354 TokenType.UUID, 355 TokenType.GEOGRAPHY, 356 TokenType.GEOMETRY, 357 TokenType.HLLSKETCH, 358 TokenType.HSTORE, 359 TokenType.PSEUDO_TYPE, 360 TokenType.SUPER, 361 TokenType.SERIAL, 362 TokenType.SMALLSERIAL, 363 TokenType.BIGSERIAL, 364 TokenType.XML, 365 TokenType.YEAR, 366 TokenType.UNIQUEIDENTIFIER, 367 TokenType.USERDEFINED, 368 TokenType.MONEY, 369 TokenType.SMALLMONEY, 370 TokenType.ROWVERSION, 371 TokenType.IMAGE, 372 TokenType.VARIANT, 373 TokenType.VECTOR, 374 TokenType.OBJECT, 375 TokenType.OBJECT_IDENTIFIER, 376 TokenType.INET, 377 TokenType.IPADDRESS, 378 TokenType.IPPREFIX, 379 TokenType.IPV4, 380 TokenType.IPV6, 381 TokenType.UNKNOWN, 382 TokenType.NULL, 383 TokenType.NAME, 384 TokenType.TDIGEST, 385 *ENUM_TYPE_TOKENS, 386 *NESTED_TYPE_TOKENS, 387 *AGGREGATE_TYPE_TOKENS, 388 } 389 390 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 391 TokenType.BIGINT: TokenType.UBIGINT, 392 TokenType.INT: TokenType.UINT, 393 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 394 TokenType.SMALLINT: TokenType.USMALLINT, 395 TokenType.TINYINT: TokenType.UTINYINT, 396 TokenType.DECIMAL: TokenType.UDECIMAL, 397 } 398 399 SUBQUERY_PREDICATES = { 400 TokenType.ANY: exp.Any, 401 TokenType.ALL: exp.All, 402 TokenType.EXISTS: exp.Exists, 403 TokenType.SOME: exp.Any, 404 } 405 406 RESERVED_TOKENS = { 407 *Tokenizer.SINGLE_TOKENS.values(), 408 TokenType.SELECT, 409 } - {TokenType.IDENTIFIER} 410 411 DB_CREATABLES = { 412 TokenType.DATABASE, 413 TokenType.DICTIONARY, 414 TokenType.MODEL, 415 TokenType.SCHEMA, 416 TokenType.SEQUENCE, 417 TokenType.STORAGE_INTEGRATION, 418 TokenType.TABLE, 419 TokenType.TAG, 420 TokenType.VIEW, 421 TokenType.WAREHOUSE, 422 TokenType.STREAMLIT, 423 } 424 425 CREATABLES = { 426 TokenType.COLUMN, 427 TokenType.CONSTRAINT, 428 TokenType.FOREIGN_KEY, 429 TokenType.FUNCTION, 430 TokenType.INDEX, 431 TokenType.PROCEDURE, 432 *DB_CREATABLES, 433 } 434 435 ALTERABLES = { 436 TokenType.INDEX, 437 TokenType.TABLE, 438 TokenType.VIEW, 439 } 440 441 # Tokens that can represent identifiers 442 ID_VAR_TOKENS = { 443 TokenType.ALL, 444 TokenType.VAR, 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASC, 448 TokenType.ASOF, 449 TokenType.AUTO_INCREMENT, 450 TokenType.BEGIN, 451 TokenType.BPCHAR, 452 TokenType.CACHE, 453 TokenType.CASE, 454 TokenType.COLLATE, 455 TokenType.COMMAND, 456 TokenType.COMMENT, 457 TokenType.COMMIT, 458 TokenType.CONSTRAINT, 459 TokenType.COPY, 460 TokenType.CUBE, 461 TokenType.DEFAULT, 462 TokenType.DELETE, 463 TokenType.DESC, 464 TokenType.DESCRIBE, 465 TokenType.DICTIONARY, 466 TokenType.DIV, 467 TokenType.END, 468 TokenType.EXECUTE, 469 TokenType.ESCAPE, 470 TokenType.FALSE, 471 TokenType.FIRST, 472 TokenType.FILTER, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.FULL, 476 TokenType.IDENTIFIER, 477 TokenType.IS, 478 TokenType.ISNULL, 479 TokenType.INTERVAL, 480 TokenType.KEEP, 481 TokenType.KILL, 482 TokenType.LEFT, 483 TokenType.LOAD, 484 TokenType.MERGE, 485 TokenType.NATURAL, 486 TokenType.NEXT, 487 TokenType.OFFSET, 488 TokenType.OPERATOR, 489 TokenType.ORDINALITY, 490 TokenType.OVERLAPS, 491 TokenType.OVERWRITE, 492 TokenType.PARTITION, 493 TokenType.PERCENT, 494 TokenType.PIVOT, 495 TokenType.PRAGMA, 496 TokenType.RANGE, 497 TokenType.RECURSIVE, 498 TokenType.REFERENCES, 499 TokenType.REFRESH, 500 TokenType.RENAME, 501 TokenType.REPLACE, 502 TokenType.RIGHT, 503 TokenType.ROLLUP, 504 TokenType.ROW, 505 TokenType.ROWS, 506 TokenType.SEMI, 507 TokenType.SET, 508 TokenType.SETTINGS, 509 TokenType.SHOW, 510 TokenType.TEMPORARY, 511 TokenType.TOP, 512 TokenType.TRUE, 513 TokenType.TRUNCATE, 514 TokenType.UNIQUE, 515 TokenType.UNNEST, 516 TokenType.UNPIVOT, 517 TokenType.UPDATE, 518 TokenType.USE, 519 TokenType.VOLATILE, 520 TokenType.WINDOW, 521 *CREATABLES, 522 *SUBQUERY_PREDICATES, 523 *TYPE_TOKENS, 524 *NO_PAREN_FUNCTIONS, 525 } 526 ID_VAR_TOKENS.remove(TokenType.UNION) 527 528 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 529 530 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 531 TokenType.ANTI, 532 TokenType.APPLY, 533 TokenType.ASOF, 534 TokenType.FULL, 535 TokenType.LEFT, 536 TokenType.LOCK, 537 TokenType.NATURAL, 538 TokenType.OFFSET, 539 TokenType.RIGHT, 540 TokenType.SEMI, 541 TokenType.WINDOW, 542 } 543 544 ALIAS_TOKENS = ID_VAR_TOKENS 545 546 ARRAY_CONSTRUCTORS = { 547 "ARRAY": exp.Array, 548 "LIST": exp.List, 549 } 550 551 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 552 553 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 554 555 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 556 557 FUNC_TOKENS = { 558 TokenType.COLLATE, 559 TokenType.COMMAND, 560 TokenType.CURRENT_DATE, 561 TokenType.CURRENT_DATETIME, 562 TokenType.CURRENT_TIMESTAMP, 563 TokenType.CURRENT_TIME, 564 TokenType.CURRENT_USER, 565 TokenType.FILTER, 566 TokenType.FIRST, 567 TokenType.FORMAT, 568 TokenType.GLOB, 569 TokenType.IDENTIFIER, 570 TokenType.INDEX, 571 TokenType.ISNULL, 572 TokenType.ILIKE, 573 TokenType.INSERT, 574 TokenType.LIKE, 575 TokenType.MERGE, 576 TokenType.OFFSET, 577 TokenType.PRIMARY_KEY, 578 TokenType.RANGE, 579 TokenType.REPLACE, 580 TokenType.RLIKE, 581 TokenType.ROW, 582 TokenType.UNNEST, 583 TokenType.VAR, 584 TokenType.LEFT, 585 TokenType.RIGHT, 586 TokenType.SEQUENCE, 587 TokenType.DATE, 588 TokenType.DATETIME, 589 TokenType.TABLE, 590 TokenType.TIMESTAMP, 591 TokenType.TIMESTAMPTZ, 592 TokenType.TRUNCATE, 593 TokenType.WINDOW, 594 TokenType.XOR, 595 *TYPE_TOKENS, 596 *SUBQUERY_PREDICATES, 597 } 598 599 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 600 TokenType.AND: exp.And, 601 } 602 603 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 604 TokenType.COLON_EQ: exp.PropertyEQ, 605 } 606 607 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 608 TokenType.OR: exp.Or, 609 } 610 611 EQUALITY = { 612 TokenType.EQ: exp.EQ, 613 TokenType.NEQ: exp.NEQ, 614 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 615 } 616 617 COMPARISON = { 618 TokenType.GT: exp.GT, 619 TokenType.GTE: exp.GTE, 620 TokenType.LT: exp.LT, 621 TokenType.LTE: exp.LTE, 622 } 623 624 BITWISE = { 625 TokenType.AMP: exp.BitwiseAnd, 626 TokenType.CARET: exp.BitwiseXor, 627 TokenType.PIPE: exp.BitwiseOr, 628 } 629 630 TERM = { 631 TokenType.DASH: exp.Sub, 632 TokenType.PLUS: exp.Add, 633 TokenType.MOD: exp.Mod, 634 TokenType.COLLATE: exp.Collate, 635 } 636 637 FACTOR = { 638 TokenType.DIV: exp.IntDiv, 639 TokenType.LR_ARROW: exp.Distance, 640 TokenType.SLASH: exp.Div, 641 TokenType.STAR: exp.Mul, 642 } 643 644 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 645 646 TIMES = { 647 TokenType.TIME, 648 TokenType.TIMETZ, 649 } 650 651 TIMESTAMPS = { 652 TokenType.TIMESTAMP, 653 TokenType.TIMESTAMPTZ, 654 TokenType.TIMESTAMPLTZ, 655 *TIMES, 656 } 657 658 SET_OPERATIONS = { 659 TokenType.UNION, 660 TokenType.INTERSECT, 661 TokenType.EXCEPT, 662 } 663 664 JOIN_METHODS = { 665 TokenType.ASOF, 666 TokenType.NATURAL, 667 TokenType.POSITIONAL, 668 } 669 670 JOIN_SIDES = { 671 TokenType.LEFT, 672 TokenType.RIGHT, 673 TokenType.FULL, 674 } 675 676 JOIN_KINDS = { 677 TokenType.ANTI, 678 TokenType.CROSS, 679 TokenType.INNER, 680 TokenType.OUTER, 681 TokenType.SEMI, 682 TokenType.STRAIGHT_JOIN, 683 } 684 685 JOIN_HINTS: t.Set[str] = set() 686 687 LAMBDAS = { 688 TokenType.ARROW: lambda self, expressions: self.expression( 689 exp.Lambda, 690 this=self._replace_lambda( 691 self._parse_assignment(), 692 expressions, 693 ), 694 expressions=expressions, 695 ), 696 TokenType.FARROW: lambda self, expressions: self.expression( 697 exp.Kwarg, 698 this=exp.var(expressions[0].name), 699 expression=self._parse_assignment(), 700 ), 701 } 702 703 COLUMN_OPERATORS = { 704 TokenType.DOT: None, 705 TokenType.DCOLON: lambda self, this, to: self.expression( 706 exp.Cast if self.STRICT_CAST else exp.TryCast, 707 this=this, 708 to=to, 709 ), 710 TokenType.ARROW: lambda self, this, path: self.expression( 711 exp.JSONExtract, 712 this=this, 713 expression=self.dialect.to_json_path(path), 714 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 715 ), 716 TokenType.DARROW: lambda self, this, path: self.expression( 717 exp.JSONExtractScalar, 718 this=this, 719 expression=self.dialect.to_json_path(path), 720 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 721 ), 722 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 723 exp.JSONBExtract, 724 this=this, 725 expression=path, 726 ), 727 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 728 exp.JSONBExtractScalar, 729 this=this, 730 expression=path, 731 ), 732 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 733 exp.JSONBContains, 734 this=this, 735 expression=key, 736 ), 737 } 738 739 EXPRESSION_PARSERS = { 740 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 741 exp.Column: lambda self: self._parse_column(), 742 exp.Condition: lambda self: self._parse_assignment(), 743 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 744 exp.Expression: lambda self: self._parse_expression(), 745 exp.From: lambda self: self._parse_from(joins=True), 746 exp.Group: lambda self: self._parse_group(), 747 exp.Having: lambda self: self._parse_having(), 748 exp.Identifier: lambda self: self._parse_id_var(), 749 exp.Join: lambda self: self._parse_join(), 750 exp.Lambda: lambda self: self._parse_lambda(), 751 exp.Lateral: lambda self: self._parse_lateral(), 752 exp.Limit: lambda self: self._parse_limit(), 753 exp.Offset: lambda self: self._parse_offset(), 754 exp.Order: lambda self: self._parse_order(), 755 exp.Ordered: lambda self: self._parse_ordered(), 756 exp.Properties: lambda self: self._parse_properties(), 757 exp.Qualify: lambda self: self._parse_qualify(), 758 exp.Returning: lambda self: self._parse_returning(), 759 exp.Select: lambda self: self._parse_select(), 760 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 761 exp.Table: lambda self: self._parse_table_parts(), 762 exp.TableAlias: lambda self: self._parse_table_alias(), 763 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 764 exp.Where: lambda self: self._parse_where(), 765 exp.Window: lambda self: self._parse_named_window(), 766 exp.With: lambda self: self._parse_with(), 767 "JOIN_TYPE": lambda self: self._parse_join_parts(), 768 } 769 770 STATEMENT_PARSERS = { 771 TokenType.ALTER: lambda self: self._parse_alter(), 772 TokenType.BEGIN: lambda self: self._parse_transaction(), 773 TokenType.CACHE: lambda self: self._parse_cache(), 774 TokenType.COMMENT: lambda self: self._parse_comment(), 775 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 776 TokenType.COPY: lambda self: self._parse_copy(), 777 TokenType.CREATE: lambda self: self._parse_create(), 778 TokenType.DELETE: lambda self: self._parse_delete(), 779 TokenType.DESC: lambda self: self._parse_describe(), 780 TokenType.DESCRIBE: lambda self: self._parse_describe(), 781 TokenType.DROP: lambda self: self._parse_drop(), 782 TokenType.INSERT: lambda self: self._parse_insert(), 783 TokenType.KILL: lambda self: self._parse_kill(), 784 TokenType.LOAD: lambda self: self._parse_load(), 785 TokenType.MERGE: lambda self: self._parse_merge(), 786 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 787 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 788 TokenType.REFRESH: lambda self: self._parse_refresh(), 789 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 790 TokenType.SET: lambda self: self._parse_set(), 791 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 792 TokenType.UNCACHE: lambda self: self._parse_uncache(), 793 TokenType.UPDATE: lambda self: self._parse_update(), 794 TokenType.USE: lambda self: self.expression( 795 exp.Use, 796 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 797 this=self._parse_table(schema=False), 798 ), 799 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 800 } 801 802 UNARY_PARSERS = { 803 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 804 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 805 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 806 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 807 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 808 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 809 } 810 811 STRING_PARSERS = { 812 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 813 exp.RawString, this=token.text 814 ), 815 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 816 exp.National, this=token.text 817 ), 818 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 819 TokenType.STRING: lambda self, token: self.expression( 820 exp.Literal, this=token.text, is_string=True 821 ), 822 TokenType.UNICODE_STRING: lambda self, token: self.expression( 823 exp.UnicodeString, 824 this=token.text, 825 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 826 ), 827 } 828 829 NUMERIC_PARSERS = { 830 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 831 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 832 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 833 TokenType.NUMBER: lambda self, token: self.expression( 834 exp.Literal, this=token.text, is_string=False 835 ), 836 } 837 838 PRIMARY_PARSERS = { 839 **STRING_PARSERS, 840 **NUMERIC_PARSERS, 841 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 842 TokenType.NULL: lambda self, _: self.expression(exp.Null), 843 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 844 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 845 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 846 TokenType.STAR: lambda self, _: self._parse_star_ops(), 847 } 848 849 PLACEHOLDER_PARSERS = { 850 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 851 TokenType.PARAMETER: lambda self: self._parse_parameter(), 852 TokenType.COLON: lambda self: ( 853 self.expression(exp.Placeholder, this=self._prev.text) 854 if self._match_set(self.ID_VAR_TOKENS) 855 else None 856 ), 857 } 858 859 RANGE_PARSERS = { 860 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 861 TokenType.GLOB: binary_range_parser(exp.Glob), 862 TokenType.ILIKE: binary_range_parser(exp.ILike), 863 TokenType.IN: lambda self, this: self._parse_in(this), 864 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 865 TokenType.IS: lambda self, this: self._parse_is(this), 866 TokenType.LIKE: binary_range_parser(exp.Like), 867 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 868 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 869 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 870 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 871 } 872 873 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 874 "ALLOWED_VALUES": lambda self: self.expression( 875 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 876 ), 877 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 878 "AUTO": lambda self: self._parse_auto_property(), 879 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 880 "BACKUP": lambda self: self.expression( 881 exp.BackupProperty, this=self._parse_var(any_token=True) 882 ), 883 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 884 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 885 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 886 "CHECKSUM": lambda self: self._parse_checksum(), 887 "CLUSTER BY": lambda self: self._parse_cluster(), 888 "CLUSTERED": lambda self: self._parse_clustered_by(), 889 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 890 exp.CollateProperty, **kwargs 891 ), 892 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 893 "CONTAINS": lambda self: self._parse_contains_property(), 894 "COPY": lambda self: self._parse_copy_property(), 895 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 896 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 897 "DEFINER": lambda self: self._parse_definer(), 898 "DETERMINISTIC": lambda self: self.expression( 899 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 900 ), 901 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 902 "DUPLICATE": lambda self: self._parse_duplicate(), 903 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 904 "DISTKEY": lambda self: self._parse_distkey(), 905 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 906 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 907 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 908 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 909 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 910 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 911 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 912 "FREESPACE": lambda self: self._parse_freespace(), 913 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 914 "HEAP": lambda self: self.expression(exp.HeapProperty), 915 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 916 "IMMUTABLE": lambda self: self.expression( 917 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 918 ), 919 "INHERITS": lambda self: self.expression( 920 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 921 ), 922 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 923 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 924 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 925 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 926 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 927 "LIKE": lambda self: self._parse_create_like(), 928 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 929 "LOCK": lambda self: self._parse_locking(), 930 "LOCKING": lambda self: self._parse_locking(), 931 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 932 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 933 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 934 "MODIFIES": lambda self: self._parse_modifies_property(), 935 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 936 "NO": lambda self: self._parse_no_property(), 937 "ON": lambda self: self._parse_on_property(), 938 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 939 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 940 "PARTITION": lambda self: self._parse_partitioned_of(), 941 "PARTITION BY": lambda self: self._parse_partitioned_by(), 942 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 943 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 944 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 945 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 946 "READS": lambda self: self._parse_reads_property(), 947 "REMOTE": lambda self: self._parse_remote_with_connection(), 948 "RETURNS": lambda self: self._parse_returns(), 949 "STRICT": lambda self: self.expression(exp.StrictProperty), 950 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 951 "ROW": lambda self: self._parse_row(), 952 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 953 "SAMPLE": lambda self: self.expression( 954 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 955 ), 956 "SECURE": lambda self: self.expression(exp.SecureProperty), 957 "SECURITY": lambda self: self._parse_security(), 958 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 959 "SETTINGS": lambda self: self._parse_settings_property(), 960 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 961 "SORTKEY": lambda self: self._parse_sortkey(), 962 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 963 "STABLE": lambda self: self.expression( 964 exp.StabilityProperty, this=exp.Literal.string("STABLE") 965 ), 966 "STORED": lambda self: self._parse_stored(), 967 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 968 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 969 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 970 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 971 "TO": lambda self: self._parse_to_table(), 972 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 973 "TRANSFORM": lambda self: self.expression( 974 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 975 ), 976 "TTL": lambda self: self._parse_ttl(), 977 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 978 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 979 "VOLATILE": lambda self: self._parse_volatile_property(), 980 "WITH": lambda self: self._parse_with_property(), 981 } 982 983 CONSTRAINT_PARSERS = { 984 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 985 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 986 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 987 "CHARACTER SET": lambda self: self.expression( 988 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 989 ), 990 "CHECK": lambda self: self.expression( 991 exp.CheckColumnConstraint, 992 this=self._parse_wrapped(self._parse_assignment), 993 enforced=self._match_text_seq("ENFORCED"), 994 ), 995 "COLLATE": lambda self: self.expression( 996 exp.CollateColumnConstraint, 997 this=self._parse_identifier() or self._parse_column(), 998 ), 999 "COMMENT": lambda self: self.expression( 1000 exp.CommentColumnConstraint, this=self._parse_string() 1001 ), 1002 "COMPRESS": lambda self: self._parse_compress(), 1003 "CLUSTERED": lambda self: self.expression( 1004 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1005 ), 1006 "NONCLUSTERED": lambda self: self.expression( 1007 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "DEFAULT": lambda self: self.expression( 1010 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1011 ), 1012 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1013 "EPHEMERAL": lambda self: self.expression( 1014 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1015 ), 1016 "EXCLUDE": lambda self: self.expression( 1017 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1018 ), 1019 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1020 "FORMAT": lambda self: self.expression( 1021 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1022 ), 1023 "GENERATED": lambda self: self._parse_generated_as_identity(), 1024 "IDENTITY": lambda self: self._parse_auto_increment(), 1025 "INLINE": lambda self: self._parse_inline(), 1026 "LIKE": lambda self: self._parse_create_like(), 1027 "NOT": lambda self: self._parse_not_constraint(), 1028 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1029 "ON": lambda self: ( 1030 self._match(TokenType.UPDATE) 1031 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1032 ) 1033 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1034 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1035 "PERIOD": lambda self: self._parse_period_for_system_time(), 1036 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1037 "REFERENCES": lambda self: self._parse_references(match=False), 1038 "TITLE": lambda self: self.expression( 1039 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1040 ), 1041 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1042 "UNIQUE": lambda self: self._parse_unique(), 1043 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1044 "WITH": lambda self: self.expression( 1045 exp.Properties, expressions=self._parse_wrapped_properties() 1046 ), 1047 } 1048 1049 ALTER_PARSERS = { 1050 "ADD": lambda self: self._parse_alter_table_add(), 1051 "ALTER": lambda self: self._parse_alter_table_alter(), 1052 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1053 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1054 "DROP": lambda self: self._parse_alter_table_drop(), 1055 "RENAME": lambda self: self._parse_alter_table_rename(), 1056 "SET": lambda self: self._parse_alter_table_set(), 1057 "AS": lambda self: self._parse_select(), 1058 } 1059 1060 ALTER_ALTER_PARSERS = { 1061 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1062 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1063 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1064 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1065 } 1066 1067 SCHEMA_UNNAMED_CONSTRAINTS = { 1068 "CHECK", 1069 "EXCLUDE", 1070 "FOREIGN KEY", 1071 "LIKE", 1072 "PERIOD", 1073 "PRIMARY KEY", 1074 "UNIQUE", 1075 } 1076 1077 NO_PAREN_FUNCTION_PARSERS = { 1078 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1079 "CASE": lambda self: self._parse_case(), 1080 "CONNECT_BY_ROOT": lambda self: self.expression( 1081 exp.ConnectByRoot, this=self._parse_column() 1082 ), 1083 "IF": lambda self: self._parse_if(), 1084 "NEXT": lambda self: self._parse_next_value_for(), 1085 } 1086 1087 INVALID_FUNC_NAME_TOKENS = { 1088 TokenType.IDENTIFIER, 1089 TokenType.STRING, 1090 } 1091 1092 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1093 1094 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1095 1096 FUNCTION_PARSERS = { 1097 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1098 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1099 "DECODE": lambda self: self._parse_decode(), 1100 "EXTRACT": lambda self: self._parse_extract(), 1101 "GAP_FILL": lambda self: self._parse_gap_fill(), 1102 "JSON_OBJECT": lambda self: self._parse_json_object(), 1103 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1104 "JSON_TABLE": lambda self: self._parse_json_table(), 1105 "MATCH": lambda self: self._parse_match_against(), 1106 "NORMALIZE": lambda self: self._parse_normalize(), 1107 "OPENJSON": lambda self: self._parse_open_json(), 1108 "POSITION": lambda self: self._parse_position(), 1109 "PREDICT": lambda self: self._parse_predict(), 1110 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1111 "STRING_AGG": lambda self: self._parse_string_agg(), 1112 "SUBSTRING": lambda self: self._parse_substring(), 1113 "TRIM": lambda self: self._parse_trim(), 1114 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1115 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1116 } 1117 1118 QUERY_MODIFIER_PARSERS = { 1119 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1120 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1121 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1122 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1123 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1124 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1125 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1126 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1127 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1128 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1129 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1130 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1131 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1132 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1133 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1134 TokenType.CLUSTER_BY: lambda self: ( 1135 "cluster", 1136 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1137 ), 1138 TokenType.DISTRIBUTE_BY: lambda self: ( 1139 "distribute", 1140 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1141 ), 1142 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1143 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1144 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1145 } 1146 1147 SET_PARSERS = { 1148 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1149 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1150 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1151 "TRANSACTION": lambda self: self._parse_set_transaction(), 1152 } 1153 1154 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1155 1156 TYPE_LITERAL_PARSERS = { 1157 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1158 } 1159 1160 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1161 1162 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1163 1164 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1165 1166 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1167 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1168 "ISOLATION": ( 1169 ("LEVEL", "REPEATABLE", "READ"), 1170 ("LEVEL", "READ", "COMMITTED"), 1171 ("LEVEL", "READ", "UNCOMITTED"), 1172 ("LEVEL", "SERIALIZABLE"), 1173 ), 1174 "READ": ("WRITE", "ONLY"), 1175 } 1176 1177 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1178 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1179 ) 1180 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1181 1182 CREATE_SEQUENCE: OPTIONS_TYPE = { 1183 "SCALE": ("EXTEND", "NOEXTEND"), 1184 "SHARD": ("EXTEND", "NOEXTEND"), 1185 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1186 **dict.fromkeys( 1187 ( 1188 "SESSION", 1189 "GLOBAL", 1190 "KEEP", 1191 "NOKEEP", 1192 "ORDER", 1193 "NOORDER", 1194 "NOCACHE", 1195 "CYCLE", 1196 "NOCYCLE", 1197 "NOMINVALUE", 1198 "NOMAXVALUE", 1199 "NOSCALE", 1200 "NOSHARD", 1201 ), 1202 tuple(), 1203 ), 1204 } 1205 1206 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1207 1208 USABLES: OPTIONS_TYPE = dict.fromkeys( 1209 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1210 ) 1211 1212 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1213 1214 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1215 "TYPE": ("EVOLUTION",), 1216 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1217 } 1218 1219 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1220 "NOT": ("ENFORCED",), 1221 "MATCH": ( 1222 "FULL", 1223 "PARTIAL", 1224 "SIMPLE", 1225 ), 1226 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1227 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1228 } 1229 1230 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1231 1232 CLONE_KEYWORDS = {"CLONE", "COPY"} 1233 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1234 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1235 1236 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1237 1238 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1239 1240 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1241 1242 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1243 1244 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1245 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1246 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1247 1248 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1249 1250 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1251 1252 ADD_CONSTRAINT_TOKENS = { 1253 TokenType.CONSTRAINT, 1254 TokenType.FOREIGN_KEY, 1255 TokenType.INDEX, 1256 TokenType.KEY, 1257 TokenType.PRIMARY_KEY, 1258 TokenType.UNIQUE, 1259 } 1260 1261 DISTINCT_TOKENS = {TokenType.DISTINCT} 1262 1263 NULL_TOKENS = {TokenType.NULL} 1264 1265 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1266 1267 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1268 1269 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1270 1271 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1272 1273 ODBC_DATETIME_LITERALS = { 1274 "d": exp.Date, 1275 "t": exp.Time, 1276 "ts": exp.Timestamp, 1277 } 1278 1279 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1280 1281 STRICT_CAST = True 1282 1283 PREFIXED_PIVOT_COLUMNS = False 1284 IDENTIFY_PIVOT_STRINGS = False 1285 1286 LOG_DEFAULTS_TO_LN = False 1287 1288 # Whether ADD is present for each column added by ALTER TABLE 1289 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1290 1291 # Whether the table sample clause expects CSV syntax 1292 TABLESAMPLE_CSV = False 1293 1294 # The default method used for table sampling 1295 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1296 1297 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1298 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1299 1300 # Whether the TRIM function expects the characters to trim as its first argument 1301 TRIM_PATTERN_FIRST = False 1302 1303 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1304 STRING_ALIASES = False 1305 1306 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1307 MODIFIERS_ATTACHED_TO_SET_OP = True 1308 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1309 1310 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1311 NO_PAREN_IF_COMMANDS = True 1312 1313 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1314 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1315 1316 # Whether the `:` operator is used to extract a value from a VARIANT column 1317 COLON_IS_VARIANT_EXTRACT = False 1318 1319 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1320 # If this is True and '(' is not found, the keyword will be treated as an identifier 1321 VALUES_FOLLOWED_BY_PAREN = True 1322 1323 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1324 SUPPORTS_IMPLICIT_UNNEST = False 1325 1326 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1327 INTERVAL_SPANS = True 1328 1329 # Whether a PARTITION clause can follow a table reference 1330 SUPPORTS_PARTITION_SELECTION = False 1331 1332 __slots__ = ( 1333 "error_level", 1334 "error_message_context", 1335 "max_errors", 1336 "dialect", 1337 "sql", 1338 "errors", 1339 "_tokens", 1340 "_index", 1341 "_curr", 1342 "_next", 1343 "_prev", 1344 "_prev_comments", 1345 ) 1346 1347 # Autofilled 1348 SHOW_TRIE: t.Dict = {} 1349 SET_TRIE: t.Dict = {} 1350 1351 def __init__( 1352 self, 1353 error_level: t.Optional[ErrorLevel] = None, 1354 error_message_context: int = 100, 1355 max_errors: int = 3, 1356 dialect: DialectType = None, 1357 ): 1358 from sqlglot.dialects import Dialect 1359 1360 self.error_level = error_level or ErrorLevel.IMMEDIATE 1361 self.error_message_context = error_message_context 1362 self.max_errors = max_errors 1363 self.dialect = Dialect.get_or_raise(dialect) 1364 self.reset() 1365 1366 def reset(self): 1367 self.sql = "" 1368 self.errors = [] 1369 self._tokens = [] 1370 self._index = 0 1371 self._curr = None 1372 self._next = None 1373 self._prev = None 1374 self._prev_comments = None 1375 1376 def parse( 1377 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1378 ) -> t.List[t.Optional[exp.Expression]]: 1379 """ 1380 Parses a list of tokens and returns a list of syntax trees, one tree 1381 per parsed SQL statement. 1382 1383 Args: 1384 raw_tokens: The list of tokens. 1385 sql: The original SQL string, used to produce helpful debug messages. 1386 1387 Returns: 1388 The list of the produced syntax trees. 1389 """ 1390 return self._parse( 1391 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1392 ) 1393 1394 def parse_into( 1395 self, 1396 expression_types: exp.IntoType, 1397 raw_tokens: t.List[Token], 1398 sql: t.Optional[str] = None, 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 """ 1401 Parses a list of tokens into a given Expression type. If a collection of Expression 1402 types is given instead, this method will try to parse the token list into each one 1403 of them, stopping at the first for which the parsing succeeds. 1404 1405 Args: 1406 expression_types: The expression type(s) to try and parse the token list into. 1407 raw_tokens: The list of tokens. 1408 sql: The original SQL string, used to produce helpful debug messages. 1409 1410 Returns: 1411 The target Expression. 1412 """ 1413 errors = [] 1414 for expression_type in ensure_list(expression_types): 1415 parser = self.EXPRESSION_PARSERS.get(expression_type) 1416 if not parser: 1417 raise TypeError(f"No parser registered for {expression_type}") 1418 1419 try: 1420 return self._parse(parser, raw_tokens, sql) 1421 except ParseError as e: 1422 e.errors[0]["into_expression"] = expression_type 1423 errors.append(e) 1424 1425 raise ParseError( 1426 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1427 errors=merge_errors(errors), 1428 ) from errors[-1] 1429 1430 def _parse( 1431 self, 1432 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1433 raw_tokens: t.List[Token], 1434 sql: t.Optional[str] = None, 1435 ) -> t.List[t.Optional[exp.Expression]]: 1436 self.reset() 1437 self.sql = sql or "" 1438 1439 total = len(raw_tokens) 1440 chunks: t.List[t.List[Token]] = [[]] 1441 1442 for i, token in enumerate(raw_tokens): 1443 if token.token_type == TokenType.SEMICOLON: 1444 if token.comments: 1445 chunks.append([token]) 1446 1447 if i < total - 1: 1448 chunks.append([]) 1449 else: 1450 chunks[-1].append(token) 1451 1452 expressions = [] 1453 1454 for tokens in chunks: 1455 self._index = -1 1456 self._tokens = tokens 1457 self._advance() 1458 1459 expressions.append(parse_method(self)) 1460 1461 if self._index < len(self._tokens): 1462 self.raise_error("Invalid expression / Unexpected token") 1463 1464 self.check_errors() 1465 1466 return expressions 1467 1468 def check_errors(self) -> None: 1469 """Logs or raises any found errors, depending on the chosen error level setting.""" 1470 if self.error_level == ErrorLevel.WARN: 1471 for error in self.errors: 1472 logger.error(str(error)) 1473 elif self.error_level == ErrorLevel.RAISE and self.errors: 1474 raise ParseError( 1475 concat_messages(self.errors, self.max_errors), 1476 errors=merge_errors(self.errors), 1477 ) 1478 1479 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1480 """ 1481 Appends an error in the list of recorded errors or raises it, depending on the chosen 1482 error level setting. 1483 """ 1484 token = token or self._curr or self._prev or Token.string("") 1485 start = token.start 1486 end = token.end + 1 1487 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1488 highlight = self.sql[start:end] 1489 end_context = self.sql[end : end + self.error_message_context] 1490 1491 error = ParseError.new( 1492 f"{message}. Line {token.line}, Col: {token.col}.\n" 1493 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1494 description=message, 1495 line=token.line, 1496 col=token.col, 1497 start_context=start_context, 1498 highlight=highlight, 1499 end_context=end_context, 1500 ) 1501 1502 if self.error_level == ErrorLevel.IMMEDIATE: 1503 raise error 1504 1505 self.errors.append(error) 1506 1507 def expression( 1508 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1509 ) -> E: 1510 """ 1511 Creates a new, validated Expression. 1512 1513 Args: 1514 exp_class: The expression class to instantiate. 1515 comments: An optional list of comments to attach to the expression. 1516 kwargs: The arguments to set for the expression along with their respective values. 1517 1518 Returns: 1519 The target expression. 1520 """ 1521 instance = exp_class(**kwargs) 1522 instance.add_comments(comments) if comments else self._add_comments(instance) 1523 return self.validate_expression(instance) 1524 1525 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1526 if expression and self._prev_comments: 1527 expression.add_comments(self._prev_comments) 1528 self._prev_comments = None 1529 1530 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1531 """ 1532 Validates an Expression, making sure that all its mandatory arguments are set. 1533 1534 Args: 1535 expression: The expression to validate. 1536 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1537 1538 Returns: 1539 The validated expression. 1540 """ 1541 if self.error_level != ErrorLevel.IGNORE: 1542 for error_message in expression.error_messages(args): 1543 self.raise_error(error_message) 1544 1545 return expression 1546 1547 def _find_sql(self, start: Token, end: Token) -> str: 1548 return self.sql[start.start : end.end + 1] 1549 1550 def _is_connected(self) -> bool: 1551 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1552 1553 def _advance(self, times: int = 1) -> None: 1554 self._index += times 1555 self._curr = seq_get(self._tokens, self._index) 1556 self._next = seq_get(self._tokens, self._index + 1) 1557 1558 if self._index > 0: 1559 self._prev = self._tokens[self._index - 1] 1560 self._prev_comments = self._prev.comments 1561 else: 1562 self._prev = None 1563 self._prev_comments = None 1564 1565 def _retreat(self, index: int) -> None: 1566 if index != self._index: 1567 self._advance(index - self._index) 1568 1569 def _warn_unsupported(self) -> None: 1570 if len(self._tokens) <= 1: 1571 return 1572 1573 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1574 # interested in emitting a warning for the one being currently processed. 1575 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1576 1577 logger.warning( 1578 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1579 ) 1580 1581 def _parse_command(self) -> exp.Command: 1582 self._warn_unsupported() 1583 return self.expression( 1584 exp.Command, 1585 comments=self._prev_comments, 1586 this=self._prev.text.upper(), 1587 expression=self._parse_string(), 1588 ) 1589 1590 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1591 """ 1592 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1593 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1594 solve this by setting & resetting the parser state accordingly 1595 """ 1596 index = self._index 1597 error_level = self.error_level 1598 1599 self.error_level = ErrorLevel.IMMEDIATE 1600 try: 1601 this = parse_method() 1602 except ParseError: 1603 this = None 1604 finally: 1605 if not this or retreat: 1606 self._retreat(index) 1607 self.error_level = error_level 1608 1609 return this 1610 1611 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1612 start = self._prev 1613 exists = self._parse_exists() if allow_exists else None 1614 1615 self._match(TokenType.ON) 1616 1617 materialized = self._match_text_seq("MATERIALIZED") 1618 kind = self._match_set(self.CREATABLES) and self._prev 1619 if not kind: 1620 return self._parse_as_command(start) 1621 1622 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1623 this = self._parse_user_defined_function(kind=kind.token_type) 1624 elif kind.token_type == TokenType.TABLE: 1625 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1626 elif kind.token_type == TokenType.COLUMN: 1627 this = self._parse_column() 1628 else: 1629 this = self._parse_id_var() 1630 1631 self._match(TokenType.IS) 1632 1633 return self.expression( 1634 exp.Comment, 1635 this=this, 1636 kind=kind.text, 1637 expression=self._parse_string(), 1638 exists=exists, 1639 materialized=materialized, 1640 ) 1641 1642 def _parse_to_table( 1643 self, 1644 ) -> exp.ToTableProperty: 1645 table = self._parse_table_parts(schema=True) 1646 return self.expression(exp.ToTableProperty, this=table) 1647 1648 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1649 def _parse_ttl(self) -> exp.Expression: 1650 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1651 this = self._parse_bitwise() 1652 1653 if self._match_text_seq("DELETE"): 1654 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1655 if self._match_text_seq("RECOMPRESS"): 1656 return self.expression( 1657 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1658 ) 1659 if self._match_text_seq("TO", "DISK"): 1660 return self.expression( 1661 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1662 ) 1663 if self._match_text_seq("TO", "VOLUME"): 1664 return self.expression( 1665 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1666 ) 1667 1668 return this 1669 1670 expressions = self._parse_csv(_parse_ttl_action) 1671 where = self._parse_where() 1672 group = self._parse_group() 1673 1674 aggregates = None 1675 if group and self._match(TokenType.SET): 1676 aggregates = self._parse_csv(self._parse_set_item) 1677 1678 return self.expression( 1679 exp.MergeTreeTTL, 1680 expressions=expressions, 1681 where=where, 1682 group=group, 1683 aggregates=aggregates, 1684 ) 1685 1686 def _parse_statement(self) -> t.Optional[exp.Expression]: 1687 if self._curr is None: 1688 return None 1689 1690 if self._match_set(self.STATEMENT_PARSERS): 1691 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1692 1693 if self._match_set(self.dialect.tokenizer.COMMANDS): 1694 return self._parse_command() 1695 1696 expression = self._parse_expression() 1697 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1698 return self._parse_query_modifiers(expression) 1699 1700 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1701 start = self._prev 1702 temporary = self._match(TokenType.TEMPORARY) 1703 materialized = self._match_text_seq("MATERIALIZED") 1704 1705 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1706 if not kind: 1707 return self._parse_as_command(start) 1708 1709 concurrently = self._match_text_seq("CONCURRENTLY") 1710 if_exists = exists or self._parse_exists() 1711 table = self._parse_table_parts( 1712 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1713 ) 1714 1715 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1716 1717 if self._match(TokenType.L_PAREN, advance=False): 1718 expressions = self._parse_wrapped_csv(self._parse_types) 1719 else: 1720 expressions = None 1721 1722 return self.expression( 1723 exp.Drop, 1724 comments=start.comments, 1725 exists=if_exists, 1726 this=table, 1727 expressions=expressions, 1728 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1729 temporary=temporary, 1730 materialized=materialized, 1731 cascade=self._match_text_seq("CASCADE"), 1732 constraints=self._match_text_seq("CONSTRAINTS"), 1733 purge=self._match_text_seq("PURGE"), 1734 cluster=cluster, 1735 concurrently=concurrently, 1736 ) 1737 1738 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1739 return ( 1740 self._match_text_seq("IF") 1741 and (not not_ or self._match(TokenType.NOT)) 1742 and self._match(TokenType.EXISTS) 1743 ) 1744 1745 def _parse_create(self) -> exp.Create | exp.Command: 1746 # Note: this can't be None because we've matched a statement parser 1747 start = self._prev 1748 comments = self._prev_comments 1749 1750 replace = ( 1751 start.token_type == TokenType.REPLACE 1752 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1753 or self._match_pair(TokenType.OR, TokenType.ALTER) 1754 ) 1755 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1756 1757 unique = self._match(TokenType.UNIQUE) 1758 1759 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1760 clustered = True 1761 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1762 "COLUMNSTORE" 1763 ): 1764 clustered = False 1765 else: 1766 clustered = None 1767 1768 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1769 self._advance() 1770 1771 properties = None 1772 create_token = self._match_set(self.CREATABLES) and self._prev 1773 1774 if not create_token: 1775 # exp.Properties.Location.POST_CREATE 1776 properties = self._parse_properties() 1777 create_token = self._match_set(self.CREATABLES) and self._prev 1778 1779 if not properties or not create_token: 1780 return self._parse_as_command(start) 1781 1782 concurrently = self._match_text_seq("CONCURRENTLY") 1783 exists = self._parse_exists(not_=True) 1784 this = None 1785 expression: t.Optional[exp.Expression] = None 1786 indexes = None 1787 no_schema_binding = None 1788 begin = None 1789 end = None 1790 clone = None 1791 1792 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1793 nonlocal properties 1794 if properties and temp_props: 1795 properties.expressions.extend(temp_props.expressions) 1796 elif temp_props: 1797 properties = temp_props 1798 1799 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1800 this = self._parse_user_defined_function(kind=create_token.token_type) 1801 1802 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1803 extend_props(self._parse_properties()) 1804 1805 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1806 extend_props(self._parse_properties()) 1807 1808 if not expression: 1809 if self._match(TokenType.COMMAND): 1810 expression = self._parse_as_command(self._prev) 1811 else: 1812 begin = self._match(TokenType.BEGIN) 1813 return_ = self._match_text_seq("RETURN") 1814 1815 if self._match(TokenType.STRING, advance=False): 1816 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1817 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1818 expression = self._parse_string() 1819 extend_props(self._parse_properties()) 1820 else: 1821 expression = self._parse_statement() 1822 1823 end = self._match_text_seq("END") 1824 1825 if return_: 1826 expression = self.expression(exp.Return, this=expression) 1827 elif create_token.token_type == TokenType.INDEX: 1828 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1829 if not self._match(TokenType.ON): 1830 index = self._parse_id_var() 1831 anonymous = False 1832 else: 1833 index = None 1834 anonymous = True 1835 1836 this = self._parse_index(index=index, anonymous=anonymous) 1837 elif create_token.token_type in self.DB_CREATABLES: 1838 table_parts = self._parse_table_parts( 1839 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1840 ) 1841 1842 # exp.Properties.Location.POST_NAME 1843 self._match(TokenType.COMMA) 1844 extend_props(self._parse_properties(before=True)) 1845 1846 this = self._parse_schema(this=table_parts) 1847 1848 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1849 extend_props(self._parse_properties()) 1850 1851 self._match(TokenType.ALIAS) 1852 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1853 # exp.Properties.Location.POST_ALIAS 1854 extend_props(self._parse_properties()) 1855 1856 if create_token.token_type == TokenType.SEQUENCE: 1857 expression = self._parse_types() 1858 extend_props(self._parse_properties()) 1859 else: 1860 expression = self._parse_ddl_select() 1861 1862 if create_token.token_type == TokenType.TABLE: 1863 # exp.Properties.Location.POST_EXPRESSION 1864 extend_props(self._parse_properties()) 1865 1866 indexes = [] 1867 while True: 1868 index = self._parse_index() 1869 1870 # exp.Properties.Location.POST_INDEX 1871 extend_props(self._parse_properties()) 1872 if not index: 1873 break 1874 else: 1875 self._match(TokenType.COMMA) 1876 indexes.append(index) 1877 elif create_token.token_type == TokenType.VIEW: 1878 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1879 no_schema_binding = True 1880 1881 shallow = self._match_text_seq("SHALLOW") 1882 1883 if self._match_texts(self.CLONE_KEYWORDS): 1884 copy = self._prev.text.lower() == "copy" 1885 clone = self.expression( 1886 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1887 ) 1888 1889 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1890 return self._parse_as_command(start) 1891 1892 create_kind_text = create_token.text.upper() 1893 return self.expression( 1894 exp.Create, 1895 comments=comments, 1896 this=this, 1897 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1898 replace=replace, 1899 refresh=refresh, 1900 unique=unique, 1901 expression=expression, 1902 exists=exists, 1903 properties=properties, 1904 indexes=indexes, 1905 no_schema_binding=no_schema_binding, 1906 begin=begin, 1907 end=end, 1908 clone=clone, 1909 concurrently=concurrently, 1910 clustered=clustered, 1911 ) 1912 1913 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1914 seq = exp.SequenceProperties() 1915 1916 options = [] 1917 index = self._index 1918 1919 while self._curr: 1920 self._match(TokenType.COMMA) 1921 if self._match_text_seq("INCREMENT"): 1922 self._match_text_seq("BY") 1923 self._match_text_seq("=") 1924 seq.set("increment", self._parse_term()) 1925 elif self._match_text_seq("MINVALUE"): 1926 seq.set("minvalue", self._parse_term()) 1927 elif self._match_text_seq("MAXVALUE"): 1928 seq.set("maxvalue", self._parse_term()) 1929 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1930 self._match_text_seq("=") 1931 seq.set("start", self._parse_term()) 1932 elif self._match_text_seq("CACHE"): 1933 # T-SQL allows empty CACHE which is initialized dynamically 1934 seq.set("cache", self._parse_number() or True) 1935 elif self._match_text_seq("OWNED", "BY"): 1936 # "OWNED BY NONE" is the default 1937 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1938 else: 1939 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1940 if opt: 1941 options.append(opt) 1942 else: 1943 break 1944 1945 seq.set("options", options if options else None) 1946 return None if self._index == index else seq 1947 1948 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1949 # only used for teradata currently 1950 self._match(TokenType.COMMA) 1951 1952 kwargs = { 1953 "no": self._match_text_seq("NO"), 1954 "dual": self._match_text_seq("DUAL"), 1955 "before": self._match_text_seq("BEFORE"), 1956 "default": self._match_text_seq("DEFAULT"), 1957 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1958 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1959 "after": self._match_text_seq("AFTER"), 1960 "minimum": self._match_texts(("MIN", "MINIMUM")), 1961 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1962 } 1963 1964 if self._match_texts(self.PROPERTY_PARSERS): 1965 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1966 try: 1967 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1968 except TypeError: 1969 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1970 1971 return None 1972 1973 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1974 return self._parse_wrapped_csv(self._parse_property) 1975 1976 def _parse_property(self) -> t.Optional[exp.Expression]: 1977 if self._match_texts(self.PROPERTY_PARSERS): 1978 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1979 1980 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1981 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1982 1983 if self._match_text_seq("COMPOUND", "SORTKEY"): 1984 return self._parse_sortkey(compound=True) 1985 1986 if self._match_text_seq("SQL", "SECURITY"): 1987 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1988 1989 index = self._index 1990 key = self._parse_column() 1991 1992 if not self._match(TokenType.EQ): 1993 self._retreat(index) 1994 return self._parse_sequence_properties() 1995 1996 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1997 if isinstance(key, exp.Column): 1998 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1999 2000 value = self._parse_bitwise() or self._parse_var(any_token=True) 2001 2002 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2003 if isinstance(value, exp.Column): 2004 value = exp.var(value.name) 2005 2006 return self.expression(exp.Property, this=key, value=value) 2007 2008 def _parse_stored(self) -> exp.FileFormatProperty: 2009 self._match(TokenType.ALIAS) 2010 2011 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2012 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2013 2014 return self.expression( 2015 exp.FileFormatProperty, 2016 this=( 2017 self.expression( 2018 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2019 ) 2020 if input_format or output_format 2021 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2022 ), 2023 ) 2024 2025 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2026 field = self._parse_field() 2027 if isinstance(field, exp.Identifier) and not field.quoted: 2028 field = exp.var(field) 2029 2030 return field 2031 2032 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2033 self._match(TokenType.EQ) 2034 self._match(TokenType.ALIAS) 2035 2036 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2037 2038 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2039 properties = [] 2040 while True: 2041 if before: 2042 prop = self._parse_property_before() 2043 else: 2044 prop = self._parse_property() 2045 if not prop: 2046 break 2047 for p in ensure_list(prop): 2048 properties.append(p) 2049 2050 if properties: 2051 return self.expression(exp.Properties, expressions=properties) 2052 2053 return None 2054 2055 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2056 return self.expression( 2057 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2058 ) 2059 2060 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2061 if self._match_texts(("DEFINER", "INVOKER")): 2062 security_specifier = self._prev.text.upper() 2063 return self.expression(exp.SecurityProperty, this=security_specifier) 2064 return None 2065 2066 def _parse_settings_property(self) -> exp.SettingsProperty: 2067 return self.expression( 2068 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2069 ) 2070 2071 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2072 if self._index >= 2: 2073 pre_volatile_token = self._tokens[self._index - 2] 2074 else: 2075 pre_volatile_token = None 2076 2077 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2078 return exp.VolatileProperty() 2079 2080 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2081 2082 def _parse_retention_period(self) -> exp.Var: 2083 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2084 number = self._parse_number() 2085 number_str = f"{number} " if number else "" 2086 unit = self._parse_var(any_token=True) 2087 return exp.var(f"{number_str}{unit}") 2088 2089 def _parse_system_versioning_property( 2090 self, with_: bool = False 2091 ) -> exp.WithSystemVersioningProperty: 2092 self._match(TokenType.EQ) 2093 prop = self.expression( 2094 exp.WithSystemVersioningProperty, 2095 **{ # type: ignore 2096 "on": True, 2097 "with": with_, 2098 }, 2099 ) 2100 2101 if self._match_text_seq("OFF"): 2102 prop.set("on", False) 2103 return prop 2104 2105 self._match(TokenType.ON) 2106 if self._match(TokenType.L_PAREN): 2107 while self._curr and not self._match(TokenType.R_PAREN): 2108 if self._match_text_seq("HISTORY_TABLE", "="): 2109 prop.set("this", self._parse_table_parts()) 2110 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2111 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2112 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2113 prop.set("retention_period", self._parse_retention_period()) 2114 2115 self._match(TokenType.COMMA) 2116 2117 return prop 2118 2119 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2120 self._match(TokenType.EQ) 2121 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2122 prop = self.expression(exp.DataDeletionProperty, on=on) 2123 2124 if self._match(TokenType.L_PAREN): 2125 while self._curr and not self._match(TokenType.R_PAREN): 2126 if self._match_text_seq("FILTER_COLUMN", "="): 2127 prop.set("filter_column", self._parse_column()) 2128 elif self._match_text_seq("RETENTION_PERIOD", "="): 2129 prop.set("retention_period", self._parse_retention_period()) 2130 2131 self._match(TokenType.COMMA) 2132 2133 return prop 2134 2135 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2136 kind = "HASH" 2137 expressions: t.Optional[t.List[exp.Expression]] = None 2138 if self._match_text_seq("BY", "HASH"): 2139 expressions = self._parse_wrapped_csv(self._parse_id_var) 2140 elif self._match_text_seq("BY", "RANDOM"): 2141 kind = "RANDOM" 2142 2143 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2144 buckets: t.Optional[exp.Expression] = None 2145 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2146 buckets = self._parse_number() 2147 2148 return self.expression( 2149 exp.DistributedByProperty, 2150 expressions=expressions, 2151 kind=kind, 2152 buckets=buckets, 2153 order=self._parse_order(), 2154 ) 2155 2156 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2157 self._match_text_seq("KEY") 2158 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2159 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2160 2161 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2162 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2163 prop = self._parse_system_versioning_property(with_=True) 2164 self._match_r_paren() 2165 return prop 2166 2167 if self._match(TokenType.L_PAREN, advance=False): 2168 return self._parse_wrapped_properties() 2169 2170 if self._match_text_seq("JOURNAL"): 2171 return self._parse_withjournaltable() 2172 2173 if self._match_texts(self.VIEW_ATTRIBUTES): 2174 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2175 2176 if self._match_text_seq("DATA"): 2177 return self._parse_withdata(no=False) 2178 elif self._match_text_seq("NO", "DATA"): 2179 return self._parse_withdata(no=True) 2180 2181 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2182 return self._parse_serde_properties(with_=True) 2183 2184 if self._match(TokenType.SCHEMA): 2185 return self.expression( 2186 exp.WithSchemaBindingProperty, 2187 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2188 ) 2189 2190 if not self._next: 2191 return None 2192 2193 return self._parse_withisolatedloading() 2194 2195 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2196 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2197 self._match(TokenType.EQ) 2198 2199 user = self._parse_id_var() 2200 self._match(TokenType.PARAMETER) 2201 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2202 2203 if not user or not host: 2204 return None 2205 2206 return exp.DefinerProperty(this=f"{user}@{host}") 2207 2208 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2209 self._match(TokenType.TABLE) 2210 self._match(TokenType.EQ) 2211 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2212 2213 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2214 return self.expression(exp.LogProperty, no=no) 2215 2216 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2217 return self.expression(exp.JournalProperty, **kwargs) 2218 2219 def _parse_checksum(self) -> exp.ChecksumProperty: 2220 self._match(TokenType.EQ) 2221 2222 on = None 2223 if self._match(TokenType.ON): 2224 on = True 2225 elif self._match_text_seq("OFF"): 2226 on = False 2227 2228 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2229 2230 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2231 return self.expression( 2232 exp.Cluster, 2233 expressions=( 2234 self._parse_wrapped_csv(self._parse_ordered) 2235 if wrapped 2236 else self._parse_csv(self._parse_ordered) 2237 ), 2238 ) 2239 2240 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2241 self._match_text_seq("BY") 2242 2243 self._match_l_paren() 2244 expressions = self._parse_csv(self._parse_column) 2245 self._match_r_paren() 2246 2247 if self._match_text_seq("SORTED", "BY"): 2248 self._match_l_paren() 2249 sorted_by = self._parse_csv(self._parse_ordered) 2250 self._match_r_paren() 2251 else: 2252 sorted_by = None 2253 2254 self._match(TokenType.INTO) 2255 buckets = self._parse_number() 2256 self._match_text_seq("BUCKETS") 2257 2258 return self.expression( 2259 exp.ClusteredByProperty, 2260 expressions=expressions, 2261 sorted_by=sorted_by, 2262 buckets=buckets, 2263 ) 2264 2265 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2266 if not self._match_text_seq("GRANTS"): 2267 self._retreat(self._index - 1) 2268 return None 2269 2270 return self.expression(exp.CopyGrantsProperty) 2271 2272 def _parse_freespace(self) -> exp.FreespaceProperty: 2273 self._match(TokenType.EQ) 2274 return self.expression( 2275 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2276 ) 2277 2278 def _parse_mergeblockratio( 2279 self, no: bool = False, default: bool = False 2280 ) -> exp.MergeBlockRatioProperty: 2281 if self._match(TokenType.EQ): 2282 return self.expression( 2283 exp.MergeBlockRatioProperty, 2284 this=self._parse_number(), 2285 percent=self._match(TokenType.PERCENT), 2286 ) 2287 2288 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2289 2290 def _parse_datablocksize( 2291 self, 2292 default: t.Optional[bool] = None, 2293 minimum: t.Optional[bool] = None, 2294 maximum: t.Optional[bool] = None, 2295 ) -> exp.DataBlocksizeProperty: 2296 self._match(TokenType.EQ) 2297 size = self._parse_number() 2298 2299 units = None 2300 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2301 units = self._prev.text 2302 2303 return self.expression( 2304 exp.DataBlocksizeProperty, 2305 size=size, 2306 units=units, 2307 default=default, 2308 minimum=minimum, 2309 maximum=maximum, 2310 ) 2311 2312 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2313 self._match(TokenType.EQ) 2314 always = self._match_text_seq("ALWAYS") 2315 manual = self._match_text_seq("MANUAL") 2316 never = self._match_text_seq("NEVER") 2317 default = self._match_text_seq("DEFAULT") 2318 2319 autotemp = None 2320 if self._match_text_seq("AUTOTEMP"): 2321 autotemp = self._parse_schema() 2322 2323 return self.expression( 2324 exp.BlockCompressionProperty, 2325 always=always, 2326 manual=manual, 2327 never=never, 2328 default=default, 2329 autotemp=autotemp, 2330 ) 2331 2332 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2333 index = self._index 2334 no = self._match_text_seq("NO") 2335 concurrent = self._match_text_seq("CONCURRENT") 2336 2337 if not self._match_text_seq("ISOLATED", "LOADING"): 2338 self._retreat(index) 2339 return None 2340 2341 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2342 return self.expression( 2343 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2344 ) 2345 2346 def _parse_locking(self) -> exp.LockingProperty: 2347 if self._match(TokenType.TABLE): 2348 kind = "TABLE" 2349 elif self._match(TokenType.VIEW): 2350 kind = "VIEW" 2351 elif self._match(TokenType.ROW): 2352 kind = "ROW" 2353 elif self._match_text_seq("DATABASE"): 2354 kind = "DATABASE" 2355 else: 2356 kind = None 2357 2358 if kind in ("DATABASE", "TABLE", "VIEW"): 2359 this = self._parse_table_parts() 2360 else: 2361 this = None 2362 2363 if self._match(TokenType.FOR): 2364 for_or_in = "FOR" 2365 elif self._match(TokenType.IN): 2366 for_or_in = "IN" 2367 else: 2368 for_or_in = None 2369 2370 if self._match_text_seq("ACCESS"): 2371 lock_type = "ACCESS" 2372 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2373 lock_type = "EXCLUSIVE" 2374 elif self._match_text_seq("SHARE"): 2375 lock_type = "SHARE" 2376 elif self._match_text_seq("READ"): 2377 lock_type = "READ" 2378 elif self._match_text_seq("WRITE"): 2379 lock_type = "WRITE" 2380 elif self._match_text_seq("CHECKSUM"): 2381 lock_type = "CHECKSUM" 2382 else: 2383 lock_type = None 2384 2385 override = self._match_text_seq("OVERRIDE") 2386 2387 return self.expression( 2388 exp.LockingProperty, 2389 this=this, 2390 kind=kind, 2391 for_or_in=for_or_in, 2392 lock_type=lock_type, 2393 override=override, 2394 ) 2395 2396 def _parse_partition_by(self) -> t.List[exp.Expression]: 2397 if self._match(TokenType.PARTITION_BY): 2398 return self._parse_csv(self._parse_assignment) 2399 return [] 2400 2401 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2402 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2403 if self._match_text_seq("MINVALUE"): 2404 return exp.var("MINVALUE") 2405 if self._match_text_seq("MAXVALUE"): 2406 return exp.var("MAXVALUE") 2407 return self._parse_bitwise() 2408 2409 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2410 expression = None 2411 from_expressions = None 2412 to_expressions = None 2413 2414 if self._match(TokenType.IN): 2415 this = self._parse_wrapped_csv(self._parse_bitwise) 2416 elif self._match(TokenType.FROM): 2417 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2418 self._match_text_seq("TO") 2419 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2420 elif self._match_text_seq("WITH", "(", "MODULUS"): 2421 this = self._parse_number() 2422 self._match_text_seq(",", "REMAINDER") 2423 expression = self._parse_number() 2424 self._match_r_paren() 2425 else: 2426 self.raise_error("Failed to parse partition bound spec.") 2427 2428 return self.expression( 2429 exp.PartitionBoundSpec, 2430 this=this, 2431 expression=expression, 2432 from_expressions=from_expressions, 2433 to_expressions=to_expressions, 2434 ) 2435 2436 # https://www.postgresql.org/docs/current/sql-createtable.html 2437 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2438 if not self._match_text_seq("OF"): 2439 self._retreat(self._index - 1) 2440 return None 2441 2442 this = self._parse_table(schema=True) 2443 2444 if self._match(TokenType.DEFAULT): 2445 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2446 elif self._match_text_seq("FOR", "VALUES"): 2447 expression = self._parse_partition_bound_spec() 2448 else: 2449 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2450 2451 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2452 2453 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2454 self._match(TokenType.EQ) 2455 return self.expression( 2456 exp.PartitionedByProperty, 2457 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2458 ) 2459 2460 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2461 if self._match_text_seq("AND", "STATISTICS"): 2462 statistics = True 2463 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2464 statistics = False 2465 else: 2466 statistics = None 2467 2468 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2469 2470 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2471 if self._match_text_seq("SQL"): 2472 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2473 return None 2474 2475 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2476 if self._match_text_seq("SQL", "DATA"): 2477 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2478 return None 2479 2480 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2481 if self._match_text_seq("PRIMARY", "INDEX"): 2482 return exp.NoPrimaryIndexProperty() 2483 if self._match_text_seq("SQL"): 2484 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2485 return None 2486 2487 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2488 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2489 return exp.OnCommitProperty() 2490 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2491 return exp.OnCommitProperty(delete=True) 2492 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2493 2494 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2495 if self._match_text_seq("SQL", "DATA"): 2496 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2497 return None 2498 2499 def _parse_distkey(self) -> exp.DistKeyProperty: 2500 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2501 2502 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2503 table = self._parse_table(schema=True) 2504 2505 options = [] 2506 while self._match_texts(("INCLUDING", "EXCLUDING")): 2507 this = self._prev.text.upper() 2508 2509 id_var = self._parse_id_var() 2510 if not id_var: 2511 return None 2512 2513 options.append( 2514 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2515 ) 2516 2517 return self.expression(exp.LikeProperty, this=table, expressions=options) 2518 2519 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2520 return self.expression( 2521 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2522 ) 2523 2524 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2525 self._match(TokenType.EQ) 2526 return self.expression( 2527 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2528 ) 2529 2530 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2531 self._match_text_seq("WITH", "CONNECTION") 2532 return self.expression( 2533 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2534 ) 2535 2536 def _parse_returns(self) -> exp.ReturnsProperty: 2537 value: t.Optional[exp.Expression] 2538 null = None 2539 is_table = self._match(TokenType.TABLE) 2540 2541 if is_table: 2542 if self._match(TokenType.LT): 2543 value = self.expression( 2544 exp.Schema, 2545 this="TABLE", 2546 expressions=self._parse_csv(self._parse_struct_types), 2547 ) 2548 if not self._match(TokenType.GT): 2549 self.raise_error("Expecting >") 2550 else: 2551 value = self._parse_schema(exp.var("TABLE")) 2552 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2553 null = True 2554 value = None 2555 else: 2556 value = self._parse_types() 2557 2558 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2559 2560 def _parse_describe(self) -> exp.Describe: 2561 kind = self._match_set(self.CREATABLES) and self._prev.text 2562 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2563 if self._match(TokenType.DOT): 2564 style = None 2565 self._retreat(self._index - 2) 2566 this = self._parse_table(schema=True) 2567 properties = self._parse_properties() 2568 expressions = properties.expressions if properties else None 2569 partition = self._parse_partition() 2570 return self.expression( 2571 exp.Describe, 2572 this=this, 2573 style=style, 2574 kind=kind, 2575 expressions=expressions, 2576 partition=partition, 2577 ) 2578 2579 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2580 kind = self._prev.text.upper() 2581 expressions = [] 2582 2583 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2584 if self._match(TokenType.WHEN): 2585 expression = self._parse_disjunction() 2586 self._match(TokenType.THEN) 2587 else: 2588 expression = None 2589 2590 else_ = self._match(TokenType.ELSE) 2591 2592 if not self._match(TokenType.INTO): 2593 return None 2594 2595 return self.expression( 2596 exp.ConditionalInsert, 2597 this=self.expression( 2598 exp.Insert, 2599 this=self._parse_table(schema=True), 2600 expression=self._parse_derived_table_values(), 2601 ), 2602 expression=expression, 2603 else_=else_, 2604 ) 2605 2606 expression = parse_conditional_insert() 2607 while expression is not None: 2608 expressions.append(expression) 2609 expression = parse_conditional_insert() 2610 2611 return self.expression( 2612 exp.MultitableInserts, 2613 kind=kind, 2614 comments=comments, 2615 expressions=expressions, 2616 source=self._parse_table(), 2617 ) 2618 2619 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2620 comments = ensure_list(self._prev_comments) 2621 hint = self._parse_hint() 2622 overwrite = self._match(TokenType.OVERWRITE) 2623 ignore = self._match(TokenType.IGNORE) 2624 local = self._match_text_seq("LOCAL") 2625 alternative = None 2626 is_function = None 2627 2628 if self._match_text_seq("DIRECTORY"): 2629 this: t.Optional[exp.Expression] = self.expression( 2630 exp.Directory, 2631 this=self._parse_var_or_string(), 2632 local=local, 2633 row_format=self._parse_row_format(match_row=True), 2634 ) 2635 else: 2636 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2637 comments += ensure_list(self._prev_comments) 2638 return self._parse_multitable_inserts(comments) 2639 2640 if self._match(TokenType.OR): 2641 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2642 2643 self._match(TokenType.INTO) 2644 comments += ensure_list(self._prev_comments) 2645 self._match(TokenType.TABLE) 2646 is_function = self._match(TokenType.FUNCTION) 2647 2648 this = ( 2649 self._parse_table(schema=True, parse_partition=True) 2650 if not is_function 2651 else self._parse_function() 2652 ) 2653 2654 returning = self._parse_returning() 2655 2656 return self.expression( 2657 exp.Insert, 2658 comments=comments, 2659 hint=hint, 2660 is_function=is_function, 2661 this=this, 2662 stored=self._match_text_seq("STORED") and self._parse_stored(), 2663 by_name=self._match_text_seq("BY", "NAME"), 2664 exists=self._parse_exists(), 2665 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2666 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2667 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2668 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2669 conflict=self._parse_on_conflict(), 2670 returning=returning or self._parse_returning(), 2671 overwrite=overwrite, 2672 alternative=alternative, 2673 ignore=ignore, 2674 source=self._match(TokenType.TABLE) and self._parse_table(), 2675 ) 2676 2677 def _parse_kill(self) -> exp.Kill: 2678 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2679 2680 return self.expression( 2681 exp.Kill, 2682 this=self._parse_primary(), 2683 kind=kind, 2684 ) 2685 2686 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2687 conflict = self._match_text_seq("ON", "CONFLICT") 2688 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2689 2690 if not conflict and not duplicate: 2691 return None 2692 2693 conflict_keys = None 2694 constraint = None 2695 2696 if conflict: 2697 if self._match_text_seq("ON", "CONSTRAINT"): 2698 constraint = self._parse_id_var() 2699 elif self._match(TokenType.L_PAREN): 2700 conflict_keys = self._parse_csv(self._parse_id_var) 2701 self._match_r_paren() 2702 2703 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2704 if self._prev.token_type == TokenType.UPDATE: 2705 self._match(TokenType.SET) 2706 expressions = self._parse_csv(self._parse_equality) 2707 else: 2708 expressions = None 2709 2710 return self.expression( 2711 exp.OnConflict, 2712 duplicate=duplicate, 2713 expressions=expressions, 2714 action=action, 2715 conflict_keys=conflict_keys, 2716 constraint=constraint, 2717 ) 2718 2719 def _parse_returning(self) -> t.Optional[exp.Returning]: 2720 if not self._match(TokenType.RETURNING): 2721 return None 2722 return self.expression( 2723 exp.Returning, 2724 expressions=self._parse_csv(self._parse_expression), 2725 into=self._match(TokenType.INTO) and self._parse_table_part(), 2726 ) 2727 2728 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2729 if not self._match(TokenType.FORMAT): 2730 return None 2731 return self._parse_row_format() 2732 2733 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2734 index = self._index 2735 with_ = with_ or self._match_text_seq("WITH") 2736 2737 if not self._match(TokenType.SERDE_PROPERTIES): 2738 self._retreat(index) 2739 return None 2740 return self.expression( 2741 exp.SerdeProperties, 2742 **{ # type: ignore 2743 "expressions": self._parse_wrapped_properties(), 2744 "with": with_, 2745 }, 2746 ) 2747 2748 def _parse_row_format( 2749 self, match_row: bool = False 2750 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2751 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2752 return None 2753 2754 if self._match_text_seq("SERDE"): 2755 this = self._parse_string() 2756 2757 serde_properties = self._parse_serde_properties() 2758 2759 return self.expression( 2760 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2761 ) 2762 2763 self._match_text_seq("DELIMITED") 2764 2765 kwargs = {} 2766 2767 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2768 kwargs["fields"] = self._parse_string() 2769 if self._match_text_seq("ESCAPED", "BY"): 2770 kwargs["escaped"] = self._parse_string() 2771 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2772 kwargs["collection_items"] = self._parse_string() 2773 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2774 kwargs["map_keys"] = self._parse_string() 2775 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2776 kwargs["lines"] = self._parse_string() 2777 if self._match_text_seq("NULL", "DEFINED", "AS"): 2778 kwargs["null"] = self._parse_string() 2779 2780 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2781 2782 def _parse_load(self) -> exp.LoadData | exp.Command: 2783 if self._match_text_seq("DATA"): 2784 local = self._match_text_seq("LOCAL") 2785 self._match_text_seq("INPATH") 2786 inpath = self._parse_string() 2787 overwrite = self._match(TokenType.OVERWRITE) 2788 self._match_pair(TokenType.INTO, TokenType.TABLE) 2789 2790 return self.expression( 2791 exp.LoadData, 2792 this=self._parse_table(schema=True), 2793 local=local, 2794 overwrite=overwrite, 2795 inpath=inpath, 2796 partition=self._parse_partition(), 2797 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2798 serde=self._match_text_seq("SERDE") and self._parse_string(), 2799 ) 2800 return self._parse_as_command(self._prev) 2801 2802 def _parse_delete(self) -> exp.Delete: 2803 # This handles MySQL's "Multiple-Table Syntax" 2804 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2805 tables = None 2806 comments = self._prev_comments 2807 if not self._match(TokenType.FROM, advance=False): 2808 tables = self._parse_csv(self._parse_table) or None 2809 2810 returning = self._parse_returning() 2811 2812 return self.expression( 2813 exp.Delete, 2814 comments=comments, 2815 tables=tables, 2816 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2817 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2818 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2819 where=self._parse_where(), 2820 returning=returning or self._parse_returning(), 2821 limit=self._parse_limit(), 2822 ) 2823 2824 def _parse_update(self) -> exp.Update: 2825 comments = self._prev_comments 2826 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2827 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2828 returning = self._parse_returning() 2829 return self.expression( 2830 exp.Update, 2831 comments=comments, 2832 **{ # type: ignore 2833 "this": this, 2834 "expressions": expressions, 2835 "from": self._parse_from(joins=True), 2836 "where": self._parse_where(), 2837 "returning": returning or self._parse_returning(), 2838 "order": self._parse_order(), 2839 "limit": self._parse_limit(), 2840 }, 2841 ) 2842 2843 def _parse_uncache(self) -> exp.Uncache: 2844 if not self._match(TokenType.TABLE): 2845 self.raise_error("Expecting TABLE after UNCACHE") 2846 2847 return self.expression( 2848 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2849 ) 2850 2851 def _parse_cache(self) -> exp.Cache: 2852 lazy = self._match_text_seq("LAZY") 2853 self._match(TokenType.TABLE) 2854 table = self._parse_table(schema=True) 2855 2856 options = [] 2857 if self._match_text_seq("OPTIONS"): 2858 self._match_l_paren() 2859 k = self._parse_string() 2860 self._match(TokenType.EQ) 2861 v = self._parse_string() 2862 options = [k, v] 2863 self._match_r_paren() 2864 2865 self._match(TokenType.ALIAS) 2866 return self.expression( 2867 exp.Cache, 2868 this=table, 2869 lazy=lazy, 2870 options=options, 2871 expression=self._parse_select(nested=True), 2872 ) 2873 2874 def _parse_partition(self) -> t.Optional[exp.Partition]: 2875 if not self._match(TokenType.PARTITION): 2876 return None 2877 2878 return self.expression( 2879 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2880 ) 2881 2882 def _parse_value(self) -> t.Optional[exp.Tuple]: 2883 if self._match(TokenType.L_PAREN): 2884 expressions = self._parse_csv(self._parse_expression) 2885 self._match_r_paren() 2886 return self.expression(exp.Tuple, expressions=expressions) 2887 2888 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2889 expression = self._parse_expression() 2890 if expression: 2891 return self.expression(exp.Tuple, expressions=[expression]) 2892 return None 2893 2894 def _parse_projections(self) -> t.List[exp.Expression]: 2895 return self._parse_expressions() 2896 2897 def _parse_select( 2898 self, 2899 nested: bool = False, 2900 table: bool = False, 2901 parse_subquery_alias: bool = True, 2902 parse_set_operation: bool = True, 2903 ) -> t.Optional[exp.Expression]: 2904 cte = self._parse_with() 2905 2906 if cte: 2907 this = self._parse_statement() 2908 2909 if not this: 2910 self.raise_error("Failed to parse any statement following CTE") 2911 return cte 2912 2913 if "with" in this.arg_types: 2914 this.set("with", cte) 2915 else: 2916 self.raise_error(f"{this.key} does not support CTE") 2917 this = cte 2918 2919 return this 2920 2921 # duckdb supports leading with FROM x 2922 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2923 2924 if self._match(TokenType.SELECT): 2925 comments = self._prev_comments 2926 2927 hint = self._parse_hint() 2928 2929 if self._next and not self._next.token_type == TokenType.DOT: 2930 all_ = self._match(TokenType.ALL) 2931 distinct = self._match_set(self.DISTINCT_TOKENS) 2932 else: 2933 all_, distinct = None, None 2934 2935 kind = ( 2936 self._match(TokenType.ALIAS) 2937 and self._match_texts(("STRUCT", "VALUE")) 2938 and self._prev.text.upper() 2939 ) 2940 2941 if distinct: 2942 distinct = self.expression( 2943 exp.Distinct, 2944 on=self._parse_value() if self._match(TokenType.ON) else None, 2945 ) 2946 2947 if all_ and distinct: 2948 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2949 2950 limit = self._parse_limit(top=True) 2951 projections = self._parse_projections() 2952 2953 this = self.expression( 2954 exp.Select, 2955 kind=kind, 2956 hint=hint, 2957 distinct=distinct, 2958 expressions=projections, 2959 limit=limit, 2960 ) 2961 this.comments = comments 2962 2963 into = self._parse_into() 2964 if into: 2965 this.set("into", into) 2966 2967 if not from_: 2968 from_ = self._parse_from() 2969 2970 if from_: 2971 this.set("from", from_) 2972 2973 this = self._parse_query_modifiers(this) 2974 elif (table or nested) and self._match(TokenType.L_PAREN): 2975 if self._match(TokenType.PIVOT): 2976 this = self._parse_simplified_pivot() 2977 elif self._match(TokenType.FROM): 2978 this = exp.select("*").from_( 2979 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2980 ) 2981 else: 2982 this = ( 2983 self._parse_table() 2984 if table 2985 else self._parse_select(nested=True, parse_set_operation=False) 2986 ) 2987 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2988 2989 self._match_r_paren() 2990 2991 # We return early here so that the UNION isn't attached to the subquery by the 2992 # following call to _parse_set_operations, but instead becomes the parent node 2993 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2994 elif self._match(TokenType.VALUES, advance=False): 2995 this = self._parse_derived_table_values() 2996 elif from_: 2997 this = exp.select("*").from_(from_.this, copy=False) 2998 elif self._match(TokenType.SUMMARIZE): 2999 table = self._match(TokenType.TABLE) 3000 this = self._parse_select() or self._parse_string() or self._parse_table() 3001 return self.expression(exp.Summarize, this=this, table=table) 3002 elif self._match(TokenType.DESCRIBE): 3003 this = self._parse_describe() 3004 elif self._match_text_seq("STREAM"): 3005 this = self.expression(exp.Stream, this=self._parse_function()) 3006 else: 3007 this = None 3008 3009 return self._parse_set_operations(this) if parse_set_operation else this 3010 3011 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3012 if not skip_with_token and not self._match(TokenType.WITH): 3013 return None 3014 3015 comments = self._prev_comments 3016 recursive = self._match(TokenType.RECURSIVE) 3017 3018 expressions = [] 3019 while True: 3020 expressions.append(self._parse_cte()) 3021 3022 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3023 break 3024 else: 3025 self._match(TokenType.WITH) 3026 3027 return self.expression( 3028 exp.With, comments=comments, expressions=expressions, recursive=recursive 3029 ) 3030 3031 def _parse_cte(self) -> exp.CTE: 3032 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3033 if not alias or not alias.this: 3034 self.raise_error("Expected CTE to have alias") 3035 3036 self._match(TokenType.ALIAS) 3037 comments = self._prev_comments 3038 3039 if self._match_text_seq("NOT", "MATERIALIZED"): 3040 materialized = False 3041 elif self._match_text_seq("MATERIALIZED"): 3042 materialized = True 3043 else: 3044 materialized = None 3045 3046 return self.expression( 3047 exp.CTE, 3048 this=self._parse_wrapped(self._parse_statement), 3049 alias=alias, 3050 materialized=materialized, 3051 comments=comments, 3052 ) 3053 3054 def _parse_table_alias( 3055 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3056 ) -> t.Optional[exp.TableAlias]: 3057 any_token = self._match(TokenType.ALIAS) 3058 alias = ( 3059 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3060 or self._parse_string_as_identifier() 3061 ) 3062 3063 index = self._index 3064 if self._match(TokenType.L_PAREN): 3065 columns = self._parse_csv(self._parse_function_parameter) 3066 self._match_r_paren() if columns else self._retreat(index) 3067 else: 3068 columns = None 3069 3070 if not alias and not columns: 3071 return None 3072 3073 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3074 3075 # We bubble up comments from the Identifier to the TableAlias 3076 if isinstance(alias, exp.Identifier): 3077 table_alias.add_comments(alias.pop_comments()) 3078 3079 return table_alias 3080 3081 def _parse_subquery( 3082 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3083 ) -> t.Optional[exp.Subquery]: 3084 if not this: 3085 return None 3086 3087 return self.expression( 3088 exp.Subquery, 3089 this=this, 3090 pivots=self._parse_pivots(), 3091 alias=self._parse_table_alias() if parse_alias else None, 3092 sample=self._parse_table_sample(), 3093 ) 3094 3095 def _implicit_unnests_to_explicit(self, this: E) -> E: 3096 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3097 3098 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3099 for i, join in enumerate(this.args.get("joins") or []): 3100 table = join.this 3101 normalized_table = table.copy() 3102 normalized_table.meta["maybe_column"] = True 3103 normalized_table = _norm(normalized_table, dialect=self.dialect) 3104 3105 if isinstance(table, exp.Table) and not join.args.get("on"): 3106 if normalized_table.parts[0].name in refs: 3107 table_as_column = table.to_column() 3108 unnest = exp.Unnest(expressions=[table_as_column]) 3109 3110 # Table.to_column creates a parent Alias node that we want to convert to 3111 # a TableAlias and attach to the Unnest, so it matches the parser's output 3112 if isinstance(table.args.get("alias"), exp.TableAlias): 3113 table_as_column.replace(table_as_column.this) 3114 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3115 3116 table.replace(unnest) 3117 3118 refs.add(normalized_table.alias_or_name) 3119 3120 return this 3121 3122 def _parse_query_modifiers( 3123 self, this: t.Optional[exp.Expression] 3124 ) -> t.Optional[exp.Expression]: 3125 if isinstance(this, (exp.Query, exp.Table)): 3126 for join in self._parse_joins(): 3127 this.append("joins", join) 3128 for lateral in iter(self._parse_lateral, None): 3129 this.append("laterals", lateral) 3130 3131 while True: 3132 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3133 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3134 key, expression = parser(self) 3135 3136 if expression: 3137 this.set(key, expression) 3138 if key == "limit": 3139 offset = expression.args.pop("offset", None) 3140 3141 if offset: 3142 offset = exp.Offset(expression=offset) 3143 this.set("offset", offset) 3144 3145 limit_by_expressions = expression.expressions 3146 expression.set("expressions", None) 3147 offset.set("expressions", limit_by_expressions) 3148 continue 3149 break 3150 3151 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3152 this = self._implicit_unnests_to_explicit(this) 3153 3154 return this 3155 3156 def _parse_hint(self) -> t.Optional[exp.Hint]: 3157 if self._match(TokenType.HINT): 3158 hints = [] 3159 for hint in iter( 3160 lambda: self._parse_csv( 3161 lambda: self._parse_function() or self._parse_var(upper=True) 3162 ), 3163 [], 3164 ): 3165 hints.extend(hint) 3166 3167 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3168 self.raise_error("Expected */ after HINT") 3169 3170 return self.expression(exp.Hint, expressions=hints) 3171 3172 return None 3173 3174 def _parse_into(self) -> t.Optional[exp.Into]: 3175 if not self._match(TokenType.INTO): 3176 return None 3177 3178 temp = self._match(TokenType.TEMPORARY) 3179 unlogged = self._match_text_seq("UNLOGGED") 3180 self._match(TokenType.TABLE) 3181 3182 return self.expression( 3183 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3184 ) 3185 3186 def _parse_from( 3187 self, joins: bool = False, skip_from_token: bool = False 3188 ) -> t.Optional[exp.From]: 3189 if not skip_from_token and not self._match(TokenType.FROM): 3190 return None 3191 3192 return self.expression( 3193 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3194 ) 3195 3196 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3197 return self.expression( 3198 exp.MatchRecognizeMeasure, 3199 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3200 this=self._parse_expression(), 3201 ) 3202 3203 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3204 if not self._match(TokenType.MATCH_RECOGNIZE): 3205 return None 3206 3207 self._match_l_paren() 3208 3209 partition = self._parse_partition_by() 3210 order = self._parse_order() 3211 3212 measures = ( 3213 self._parse_csv(self._parse_match_recognize_measure) 3214 if self._match_text_seq("MEASURES") 3215 else None 3216 ) 3217 3218 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3219 rows = exp.var("ONE ROW PER MATCH") 3220 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3221 text = "ALL ROWS PER MATCH" 3222 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3223 text += " SHOW EMPTY MATCHES" 3224 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3225 text += " OMIT EMPTY MATCHES" 3226 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3227 text += " WITH UNMATCHED ROWS" 3228 rows = exp.var(text) 3229 else: 3230 rows = None 3231 3232 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3233 text = "AFTER MATCH SKIP" 3234 if self._match_text_seq("PAST", "LAST", "ROW"): 3235 text += " PAST LAST ROW" 3236 elif self._match_text_seq("TO", "NEXT", "ROW"): 3237 text += " TO NEXT ROW" 3238 elif self._match_text_seq("TO", "FIRST"): 3239 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3240 elif self._match_text_seq("TO", "LAST"): 3241 text += f" TO LAST {self._advance_any().text}" # type: ignore 3242 after = exp.var(text) 3243 else: 3244 after = None 3245 3246 if self._match_text_seq("PATTERN"): 3247 self._match_l_paren() 3248 3249 if not self._curr: 3250 self.raise_error("Expecting )", self._curr) 3251 3252 paren = 1 3253 start = self._curr 3254 3255 while self._curr and paren > 0: 3256 if self._curr.token_type == TokenType.L_PAREN: 3257 paren += 1 3258 if self._curr.token_type == TokenType.R_PAREN: 3259 paren -= 1 3260 3261 end = self._prev 3262 self._advance() 3263 3264 if paren > 0: 3265 self.raise_error("Expecting )", self._curr) 3266 3267 pattern = exp.var(self._find_sql(start, end)) 3268 else: 3269 pattern = None 3270 3271 define = ( 3272 self._parse_csv(self._parse_name_as_expression) 3273 if self._match_text_seq("DEFINE") 3274 else None 3275 ) 3276 3277 self._match_r_paren() 3278 3279 return self.expression( 3280 exp.MatchRecognize, 3281 partition_by=partition, 3282 order=order, 3283 measures=measures, 3284 rows=rows, 3285 after=after, 3286 pattern=pattern, 3287 define=define, 3288 alias=self._parse_table_alias(), 3289 ) 3290 3291 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3292 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3293 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3294 cross_apply = False 3295 3296 if cross_apply is not None: 3297 this = self._parse_select(table=True) 3298 view = None 3299 outer = None 3300 elif self._match(TokenType.LATERAL): 3301 this = self._parse_select(table=True) 3302 view = self._match(TokenType.VIEW) 3303 outer = self._match(TokenType.OUTER) 3304 else: 3305 return None 3306 3307 if not this: 3308 this = ( 3309 self._parse_unnest() 3310 or self._parse_function() 3311 or self._parse_id_var(any_token=False) 3312 ) 3313 3314 while self._match(TokenType.DOT): 3315 this = exp.Dot( 3316 this=this, 3317 expression=self._parse_function() or self._parse_id_var(any_token=False), 3318 ) 3319 3320 if view: 3321 table = self._parse_id_var(any_token=False) 3322 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3323 table_alias: t.Optional[exp.TableAlias] = self.expression( 3324 exp.TableAlias, this=table, columns=columns 3325 ) 3326 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3327 # We move the alias from the lateral's child node to the lateral itself 3328 table_alias = this.args["alias"].pop() 3329 else: 3330 table_alias = self._parse_table_alias() 3331 3332 return self.expression( 3333 exp.Lateral, 3334 this=this, 3335 view=view, 3336 outer=outer, 3337 alias=table_alias, 3338 cross_apply=cross_apply, 3339 ) 3340 3341 def _parse_join_parts( 3342 self, 3343 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3344 return ( 3345 self._match_set(self.JOIN_METHODS) and self._prev, 3346 self._match_set(self.JOIN_SIDES) and self._prev, 3347 self._match_set(self.JOIN_KINDS) and self._prev, 3348 ) 3349 3350 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3351 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3352 this = self._parse_column() 3353 if isinstance(this, exp.Column): 3354 return this.this 3355 return this 3356 3357 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3358 3359 def _parse_join( 3360 self, skip_join_token: bool = False, parse_bracket: bool = False 3361 ) -> t.Optional[exp.Join]: 3362 if self._match(TokenType.COMMA): 3363 return self.expression(exp.Join, this=self._parse_table()) 3364 3365 index = self._index 3366 method, side, kind = self._parse_join_parts() 3367 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3368 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3369 3370 if not skip_join_token and not join: 3371 self._retreat(index) 3372 kind = None 3373 method = None 3374 side = None 3375 3376 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3377 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3378 3379 if not skip_join_token and not join and not outer_apply and not cross_apply: 3380 return None 3381 3382 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3383 3384 if method: 3385 kwargs["method"] = method.text 3386 if side: 3387 kwargs["side"] = side.text 3388 if kind: 3389 kwargs["kind"] = kind.text 3390 if hint: 3391 kwargs["hint"] = hint 3392 3393 if self._match(TokenType.MATCH_CONDITION): 3394 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3395 3396 if self._match(TokenType.ON): 3397 kwargs["on"] = self._parse_assignment() 3398 elif self._match(TokenType.USING): 3399 kwargs["using"] = self._parse_using_identifiers() 3400 elif ( 3401 not (outer_apply or cross_apply) 3402 and not isinstance(kwargs["this"], exp.Unnest) 3403 and not (kind and kind.token_type == TokenType.CROSS) 3404 ): 3405 index = self._index 3406 joins: t.Optional[list] = list(self._parse_joins()) 3407 3408 if joins and self._match(TokenType.ON): 3409 kwargs["on"] = self._parse_assignment() 3410 elif joins and self._match(TokenType.USING): 3411 kwargs["using"] = self._parse_using_identifiers() 3412 else: 3413 joins = None 3414 self._retreat(index) 3415 3416 kwargs["this"].set("joins", joins if joins else None) 3417 3418 comments = [c for token in (method, side, kind) if token for c in token.comments] 3419 return self.expression(exp.Join, comments=comments, **kwargs) 3420 3421 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3422 this = self._parse_assignment() 3423 3424 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3425 return this 3426 3427 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3428 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3429 3430 return this 3431 3432 def _parse_index_params(self) -> exp.IndexParameters: 3433 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3434 3435 if self._match(TokenType.L_PAREN, advance=False): 3436 columns = self._parse_wrapped_csv(self._parse_with_operator) 3437 else: 3438 columns = None 3439 3440 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3441 partition_by = self._parse_partition_by() 3442 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3443 tablespace = ( 3444 self._parse_var(any_token=True) 3445 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3446 else None 3447 ) 3448 where = self._parse_where() 3449 3450 on = self._parse_field() if self._match(TokenType.ON) else None 3451 3452 return self.expression( 3453 exp.IndexParameters, 3454 using=using, 3455 columns=columns, 3456 include=include, 3457 partition_by=partition_by, 3458 where=where, 3459 with_storage=with_storage, 3460 tablespace=tablespace, 3461 on=on, 3462 ) 3463 3464 def _parse_index( 3465 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3466 ) -> t.Optional[exp.Index]: 3467 if index or anonymous: 3468 unique = None 3469 primary = None 3470 amp = None 3471 3472 self._match(TokenType.ON) 3473 self._match(TokenType.TABLE) # hive 3474 table = self._parse_table_parts(schema=True) 3475 else: 3476 unique = self._match(TokenType.UNIQUE) 3477 primary = self._match_text_seq("PRIMARY") 3478 amp = self._match_text_seq("AMP") 3479 3480 if not self._match(TokenType.INDEX): 3481 return None 3482 3483 index = self._parse_id_var() 3484 table = None 3485 3486 params = self._parse_index_params() 3487 3488 return self.expression( 3489 exp.Index, 3490 this=index, 3491 table=table, 3492 unique=unique, 3493 primary=primary, 3494 amp=amp, 3495 params=params, 3496 ) 3497 3498 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3499 hints: t.List[exp.Expression] = [] 3500 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3501 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3502 hints.append( 3503 self.expression( 3504 exp.WithTableHint, 3505 expressions=self._parse_csv( 3506 lambda: self._parse_function() or self._parse_var(any_token=True) 3507 ), 3508 ) 3509 ) 3510 self._match_r_paren() 3511 else: 3512 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3513 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3514 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3515 3516 self._match_set((TokenType.INDEX, TokenType.KEY)) 3517 if self._match(TokenType.FOR): 3518 hint.set("target", self._advance_any() and self._prev.text.upper()) 3519 3520 hint.set("expressions", self._parse_wrapped_id_vars()) 3521 hints.append(hint) 3522 3523 return hints or None 3524 3525 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3526 return ( 3527 (not schema and self._parse_function(optional_parens=False)) 3528 or self._parse_id_var(any_token=False) 3529 or self._parse_string_as_identifier() 3530 or self._parse_placeholder() 3531 ) 3532 3533 def _parse_table_parts( 3534 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3535 ) -> exp.Table: 3536 catalog = None 3537 db = None 3538 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3539 3540 while self._match(TokenType.DOT): 3541 if catalog: 3542 # This allows nesting the table in arbitrarily many dot expressions if needed 3543 table = self.expression( 3544 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3545 ) 3546 else: 3547 catalog = db 3548 db = table 3549 # "" used for tsql FROM a..b case 3550 table = self._parse_table_part(schema=schema) or "" 3551 3552 if ( 3553 wildcard 3554 and self._is_connected() 3555 and (isinstance(table, exp.Identifier) or not table) 3556 and self._match(TokenType.STAR) 3557 ): 3558 if isinstance(table, exp.Identifier): 3559 table.args["this"] += "*" 3560 else: 3561 table = exp.Identifier(this="*") 3562 3563 # We bubble up comments from the Identifier to the Table 3564 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3565 3566 if is_db_reference: 3567 catalog = db 3568 db = table 3569 table = None 3570 3571 if not table and not is_db_reference: 3572 self.raise_error(f"Expected table name but got {self._curr}") 3573 if not db and is_db_reference: 3574 self.raise_error(f"Expected database name but got {self._curr}") 3575 3576 table = self.expression( 3577 exp.Table, 3578 comments=comments, 3579 this=table, 3580 db=db, 3581 catalog=catalog, 3582 ) 3583 3584 changes = self._parse_changes() 3585 if changes: 3586 table.set("changes", changes) 3587 3588 at_before = self._parse_historical_data() 3589 if at_before: 3590 table.set("when", at_before) 3591 3592 pivots = self._parse_pivots() 3593 if pivots: 3594 table.set("pivots", pivots) 3595 3596 return table 3597 3598 def _parse_table( 3599 self, 3600 schema: bool = False, 3601 joins: bool = False, 3602 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3603 parse_bracket: bool = False, 3604 is_db_reference: bool = False, 3605 parse_partition: bool = False, 3606 ) -> t.Optional[exp.Expression]: 3607 lateral = self._parse_lateral() 3608 if lateral: 3609 return lateral 3610 3611 unnest = self._parse_unnest() 3612 if unnest: 3613 return unnest 3614 3615 values = self._parse_derived_table_values() 3616 if values: 3617 return values 3618 3619 subquery = self._parse_select(table=True) 3620 if subquery: 3621 if not subquery.args.get("pivots"): 3622 subquery.set("pivots", self._parse_pivots()) 3623 return subquery 3624 3625 bracket = parse_bracket and self._parse_bracket(None) 3626 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3627 3628 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3629 self._parse_table 3630 ) 3631 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3632 3633 only = self._match(TokenType.ONLY) 3634 3635 this = t.cast( 3636 exp.Expression, 3637 bracket 3638 or rows_from 3639 or self._parse_bracket( 3640 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3641 ), 3642 ) 3643 3644 if only: 3645 this.set("only", only) 3646 3647 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3648 self._match_text_seq("*") 3649 3650 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3651 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3652 this.set("partition", self._parse_partition()) 3653 3654 if schema: 3655 return self._parse_schema(this=this) 3656 3657 version = self._parse_version() 3658 3659 if version: 3660 this.set("version", version) 3661 3662 if self.dialect.ALIAS_POST_TABLESAMPLE: 3663 this.set("sample", self._parse_table_sample()) 3664 3665 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3666 if alias: 3667 this.set("alias", alias) 3668 3669 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3670 return self.expression( 3671 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3672 ) 3673 3674 this.set("hints", self._parse_table_hints()) 3675 3676 if not this.args.get("pivots"): 3677 this.set("pivots", self._parse_pivots()) 3678 3679 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3680 this.set("sample", self._parse_table_sample()) 3681 3682 if joins: 3683 for join in self._parse_joins(): 3684 this.append("joins", join) 3685 3686 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3687 this.set("ordinality", True) 3688 this.set("alias", self._parse_table_alias()) 3689 3690 return this 3691 3692 def _parse_version(self) -> t.Optional[exp.Version]: 3693 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3694 this = "TIMESTAMP" 3695 elif self._match(TokenType.VERSION_SNAPSHOT): 3696 this = "VERSION" 3697 else: 3698 return None 3699 3700 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3701 kind = self._prev.text.upper() 3702 start = self._parse_bitwise() 3703 self._match_texts(("TO", "AND")) 3704 end = self._parse_bitwise() 3705 expression: t.Optional[exp.Expression] = self.expression( 3706 exp.Tuple, expressions=[start, end] 3707 ) 3708 elif self._match_text_seq("CONTAINED", "IN"): 3709 kind = "CONTAINED IN" 3710 expression = self.expression( 3711 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3712 ) 3713 elif self._match(TokenType.ALL): 3714 kind = "ALL" 3715 expression = None 3716 else: 3717 self._match_text_seq("AS", "OF") 3718 kind = "AS OF" 3719 expression = self._parse_type() 3720 3721 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3722 3723 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3724 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3725 index = self._index 3726 historical_data = None 3727 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3728 this = self._prev.text.upper() 3729 kind = ( 3730 self._match(TokenType.L_PAREN) 3731 and self._match_texts(self.HISTORICAL_DATA_KIND) 3732 and self._prev.text.upper() 3733 ) 3734 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3735 3736 if expression: 3737 self._match_r_paren() 3738 historical_data = self.expression( 3739 exp.HistoricalData, this=this, kind=kind, expression=expression 3740 ) 3741 else: 3742 self._retreat(index) 3743 3744 return historical_data 3745 3746 def _parse_changes(self) -> t.Optional[exp.Changes]: 3747 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3748 return None 3749 3750 information = self._parse_var(any_token=True) 3751 self._match_r_paren() 3752 3753 return self.expression( 3754 exp.Changes, 3755 information=information, 3756 at_before=self._parse_historical_data(), 3757 end=self._parse_historical_data(), 3758 ) 3759 3760 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3761 if not self._match(TokenType.UNNEST): 3762 return None 3763 3764 expressions = self._parse_wrapped_csv(self._parse_equality) 3765 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3766 3767 alias = self._parse_table_alias() if with_alias else None 3768 3769 if alias: 3770 if self.dialect.UNNEST_COLUMN_ONLY: 3771 if alias.args.get("columns"): 3772 self.raise_error("Unexpected extra column alias in unnest.") 3773 3774 alias.set("columns", [alias.this]) 3775 alias.set("this", None) 3776 3777 columns = alias.args.get("columns") or [] 3778 if offset and len(expressions) < len(columns): 3779 offset = columns.pop() 3780 3781 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3782 self._match(TokenType.ALIAS) 3783 offset = self._parse_id_var( 3784 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3785 ) or exp.to_identifier("offset") 3786 3787 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3788 3789 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3790 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3791 if not is_derived and not ( 3792 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3793 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3794 ): 3795 return None 3796 3797 expressions = self._parse_csv(self._parse_value) 3798 alias = self._parse_table_alias() 3799 3800 if is_derived: 3801 self._match_r_paren() 3802 3803 return self.expression( 3804 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3805 ) 3806 3807 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3808 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3809 as_modifier and self._match_text_seq("USING", "SAMPLE") 3810 ): 3811 return None 3812 3813 bucket_numerator = None 3814 bucket_denominator = None 3815 bucket_field = None 3816 percent = None 3817 size = None 3818 seed = None 3819 3820 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3821 matched_l_paren = self._match(TokenType.L_PAREN) 3822 3823 if self.TABLESAMPLE_CSV: 3824 num = None 3825 expressions = self._parse_csv(self._parse_primary) 3826 else: 3827 expressions = None 3828 num = ( 3829 self._parse_factor() 3830 if self._match(TokenType.NUMBER, advance=False) 3831 else self._parse_primary() or self._parse_placeholder() 3832 ) 3833 3834 if self._match_text_seq("BUCKET"): 3835 bucket_numerator = self._parse_number() 3836 self._match_text_seq("OUT", "OF") 3837 bucket_denominator = bucket_denominator = self._parse_number() 3838 self._match(TokenType.ON) 3839 bucket_field = self._parse_field() 3840 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3841 percent = num 3842 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3843 size = num 3844 else: 3845 percent = num 3846 3847 if matched_l_paren: 3848 self._match_r_paren() 3849 3850 if self._match(TokenType.L_PAREN): 3851 method = self._parse_var(upper=True) 3852 seed = self._match(TokenType.COMMA) and self._parse_number() 3853 self._match_r_paren() 3854 elif self._match_texts(("SEED", "REPEATABLE")): 3855 seed = self._parse_wrapped(self._parse_number) 3856 3857 if not method and self.DEFAULT_SAMPLING_METHOD: 3858 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3859 3860 return self.expression( 3861 exp.TableSample, 3862 expressions=expressions, 3863 method=method, 3864 bucket_numerator=bucket_numerator, 3865 bucket_denominator=bucket_denominator, 3866 bucket_field=bucket_field, 3867 percent=percent, 3868 size=size, 3869 seed=seed, 3870 ) 3871 3872 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3873 return list(iter(self._parse_pivot, None)) or None 3874 3875 def _parse_joins(self) -> t.Iterator[exp.Join]: 3876 return iter(self._parse_join, None) 3877 3878 # https://duckdb.org/docs/sql/statements/pivot 3879 def _parse_simplified_pivot(self) -> exp.Pivot: 3880 def _parse_on() -> t.Optional[exp.Expression]: 3881 this = self._parse_bitwise() 3882 return self._parse_in(this) if self._match(TokenType.IN) else this 3883 3884 this = self._parse_table() 3885 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3886 using = self._match(TokenType.USING) and self._parse_csv( 3887 lambda: self._parse_alias(self._parse_function()) 3888 ) 3889 group = self._parse_group() 3890 return self.expression( 3891 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3892 ) 3893 3894 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3895 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3896 this = self._parse_select_or_expression() 3897 3898 self._match(TokenType.ALIAS) 3899 alias = self._parse_bitwise() 3900 if alias: 3901 if isinstance(alias, exp.Column) and not alias.db: 3902 alias = alias.this 3903 return self.expression(exp.PivotAlias, this=this, alias=alias) 3904 3905 return this 3906 3907 value = self._parse_column() 3908 3909 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3910 self.raise_error("Expecting IN (") 3911 3912 if self._match(TokenType.ANY): 3913 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3914 else: 3915 exprs = self._parse_csv(_parse_aliased_expression) 3916 3917 self._match_r_paren() 3918 return self.expression(exp.In, this=value, expressions=exprs) 3919 3920 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3921 index = self._index 3922 include_nulls = None 3923 3924 if self._match(TokenType.PIVOT): 3925 unpivot = False 3926 elif self._match(TokenType.UNPIVOT): 3927 unpivot = True 3928 3929 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3930 if self._match_text_seq("INCLUDE", "NULLS"): 3931 include_nulls = True 3932 elif self._match_text_seq("EXCLUDE", "NULLS"): 3933 include_nulls = False 3934 else: 3935 return None 3936 3937 expressions = [] 3938 3939 if not self._match(TokenType.L_PAREN): 3940 self._retreat(index) 3941 return None 3942 3943 if unpivot: 3944 expressions = self._parse_csv(self._parse_column) 3945 else: 3946 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3947 3948 if not expressions: 3949 self.raise_error("Failed to parse PIVOT's aggregation list") 3950 3951 if not self._match(TokenType.FOR): 3952 self.raise_error("Expecting FOR") 3953 3954 field = self._parse_pivot_in() 3955 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3956 self._parse_bitwise 3957 ) 3958 3959 self._match_r_paren() 3960 3961 pivot = self.expression( 3962 exp.Pivot, 3963 expressions=expressions, 3964 field=field, 3965 unpivot=unpivot, 3966 include_nulls=include_nulls, 3967 default_on_null=default_on_null, 3968 ) 3969 3970 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3971 pivot.set("alias", self._parse_table_alias()) 3972 3973 if not unpivot: 3974 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3975 3976 columns: t.List[exp.Expression] = [] 3977 for fld in pivot.args["field"].expressions: 3978 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3979 for name in names: 3980 if self.PREFIXED_PIVOT_COLUMNS: 3981 name = f"{name}_{field_name}" if name else field_name 3982 else: 3983 name = f"{field_name}_{name}" if name else field_name 3984 3985 columns.append(exp.to_identifier(name)) 3986 3987 pivot.set("columns", columns) 3988 3989 return pivot 3990 3991 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3992 return [agg.alias for agg in aggregations] 3993 3994 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3995 if not skip_where_token and not self._match(TokenType.PREWHERE): 3996 return None 3997 3998 return self.expression( 3999 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4000 ) 4001 4002 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4003 if not skip_where_token and not self._match(TokenType.WHERE): 4004 return None 4005 4006 return self.expression( 4007 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4008 ) 4009 4010 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4011 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4012 return None 4013 4014 elements: t.Dict[str, t.Any] = defaultdict(list) 4015 4016 if self._match(TokenType.ALL): 4017 elements["all"] = True 4018 elif self._match(TokenType.DISTINCT): 4019 elements["all"] = False 4020 4021 while True: 4022 index = self._index 4023 4024 elements["expressions"].extend( 4025 self._parse_csv( 4026 lambda: None 4027 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4028 else self._parse_assignment() 4029 ) 4030 ) 4031 4032 before_with_index = self._index 4033 with_prefix = self._match(TokenType.WITH) 4034 4035 if self._match(TokenType.ROLLUP): 4036 elements["rollup"].append( 4037 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4038 ) 4039 elif self._match(TokenType.CUBE): 4040 elements["cube"].append( 4041 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4042 ) 4043 elif self._match(TokenType.GROUPING_SETS): 4044 elements["grouping_sets"].append( 4045 self.expression( 4046 exp.GroupingSets, 4047 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4048 ) 4049 ) 4050 elif self._match_text_seq("TOTALS"): 4051 elements["totals"] = True # type: ignore 4052 4053 if before_with_index <= self._index <= before_with_index + 1: 4054 self._retreat(before_with_index) 4055 break 4056 4057 if index == self._index: 4058 break 4059 4060 return self.expression(exp.Group, **elements) # type: ignore 4061 4062 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4063 return self.expression( 4064 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4065 ) 4066 4067 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4068 if self._match(TokenType.L_PAREN): 4069 grouping_set = self._parse_csv(self._parse_column) 4070 self._match_r_paren() 4071 return self.expression(exp.Tuple, expressions=grouping_set) 4072 4073 return self._parse_column() 4074 4075 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4076 if not skip_having_token and not self._match(TokenType.HAVING): 4077 return None 4078 return self.expression(exp.Having, this=self._parse_assignment()) 4079 4080 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4081 if not self._match(TokenType.QUALIFY): 4082 return None 4083 return self.expression(exp.Qualify, this=self._parse_assignment()) 4084 4085 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4086 if skip_start_token: 4087 start = None 4088 elif self._match(TokenType.START_WITH): 4089 start = self._parse_assignment() 4090 else: 4091 return None 4092 4093 self._match(TokenType.CONNECT_BY) 4094 nocycle = self._match_text_seq("NOCYCLE") 4095 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4096 exp.Prior, this=self._parse_bitwise() 4097 ) 4098 connect = self._parse_assignment() 4099 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4100 4101 if not start and self._match(TokenType.START_WITH): 4102 start = self._parse_assignment() 4103 4104 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4105 4106 def _parse_name_as_expression(self) -> exp.Alias: 4107 return self.expression( 4108 exp.Alias, 4109 alias=self._parse_id_var(any_token=True), 4110 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4111 ) 4112 4113 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4114 if self._match_text_seq("INTERPOLATE"): 4115 return self._parse_wrapped_csv(self._parse_name_as_expression) 4116 return None 4117 4118 def _parse_order( 4119 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4120 ) -> t.Optional[exp.Expression]: 4121 siblings = None 4122 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4123 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4124 return this 4125 4126 siblings = True 4127 4128 return self.expression( 4129 exp.Order, 4130 this=this, 4131 expressions=self._parse_csv(self._parse_ordered), 4132 siblings=siblings, 4133 ) 4134 4135 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4136 if not self._match(token): 4137 return None 4138 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4139 4140 def _parse_ordered( 4141 self, parse_method: t.Optional[t.Callable] = None 4142 ) -> t.Optional[exp.Ordered]: 4143 this = parse_method() if parse_method else self._parse_assignment() 4144 if not this: 4145 return None 4146 4147 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4148 this = exp.var("ALL") 4149 4150 asc = self._match(TokenType.ASC) 4151 desc = self._match(TokenType.DESC) or (asc and False) 4152 4153 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4154 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4155 4156 nulls_first = is_nulls_first or False 4157 explicitly_null_ordered = is_nulls_first or is_nulls_last 4158 4159 if ( 4160 not explicitly_null_ordered 4161 and ( 4162 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4163 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4164 ) 4165 and self.dialect.NULL_ORDERING != "nulls_are_last" 4166 ): 4167 nulls_first = True 4168 4169 if self._match_text_seq("WITH", "FILL"): 4170 with_fill = self.expression( 4171 exp.WithFill, 4172 **{ # type: ignore 4173 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4174 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4175 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4176 "interpolate": self._parse_interpolate(), 4177 }, 4178 ) 4179 else: 4180 with_fill = None 4181 4182 return self.expression( 4183 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4184 ) 4185 4186 def _parse_limit( 4187 self, 4188 this: t.Optional[exp.Expression] = None, 4189 top: bool = False, 4190 skip_limit_token: bool = False, 4191 ) -> t.Optional[exp.Expression]: 4192 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4193 comments = self._prev_comments 4194 if top: 4195 limit_paren = self._match(TokenType.L_PAREN) 4196 expression = self._parse_term() if limit_paren else self._parse_number() 4197 4198 if limit_paren: 4199 self._match_r_paren() 4200 else: 4201 expression = self._parse_term() 4202 4203 if self._match(TokenType.COMMA): 4204 offset = expression 4205 expression = self._parse_term() 4206 else: 4207 offset = None 4208 4209 limit_exp = self.expression( 4210 exp.Limit, 4211 this=this, 4212 expression=expression, 4213 offset=offset, 4214 comments=comments, 4215 expressions=self._parse_limit_by(), 4216 ) 4217 4218 return limit_exp 4219 4220 if self._match(TokenType.FETCH): 4221 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4222 direction = self._prev.text.upper() if direction else "FIRST" 4223 4224 count = self._parse_field(tokens=self.FETCH_TOKENS) 4225 percent = self._match(TokenType.PERCENT) 4226 4227 self._match_set((TokenType.ROW, TokenType.ROWS)) 4228 4229 only = self._match_text_seq("ONLY") 4230 with_ties = self._match_text_seq("WITH", "TIES") 4231 4232 if only and with_ties: 4233 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4234 4235 return self.expression( 4236 exp.Fetch, 4237 direction=direction, 4238 count=count, 4239 percent=percent, 4240 with_ties=with_ties, 4241 ) 4242 4243 return this 4244 4245 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4246 if not self._match(TokenType.OFFSET): 4247 return this 4248 4249 count = self._parse_term() 4250 self._match_set((TokenType.ROW, TokenType.ROWS)) 4251 4252 return self.expression( 4253 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4254 ) 4255 4256 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4257 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4258 4259 def _parse_locks(self) -> t.List[exp.Lock]: 4260 locks = [] 4261 while True: 4262 if self._match_text_seq("FOR", "UPDATE"): 4263 update = True 4264 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4265 "LOCK", "IN", "SHARE", "MODE" 4266 ): 4267 update = False 4268 else: 4269 break 4270 4271 expressions = None 4272 if self._match_text_seq("OF"): 4273 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4274 4275 wait: t.Optional[bool | exp.Expression] = None 4276 if self._match_text_seq("NOWAIT"): 4277 wait = True 4278 elif self._match_text_seq("WAIT"): 4279 wait = self._parse_primary() 4280 elif self._match_text_seq("SKIP", "LOCKED"): 4281 wait = False 4282 4283 locks.append( 4284 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4285 ) 4286 4287 return locks 4288 4289 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4290 while this and self._match_set(self.SET_OPERATIONS): 4291 token_type = self._prev.token_type 4292 4293 if token_type == TokenType.UNION: 4294 operation: t.Type[exp.SetOperation] = exp.Union 4295 elif token_type == TokenType.EXCEPT: 4296 operation = exp.Except 4297 else: 4298 operation = exp.Intersect 4299 4300 comments = self._prev.comments 4301 4302 if self._match(TokenType.DISTINCT): 4303 distinct: t.Optional[bool] = True 4304 elif self._match(TokenType.ALL): 4305 distinct = False 4306 else: 4307 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4308 if distinct is None: 4309 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4310 4311 by_name = self._match_text_seq("BY", "NAME") 4312 expression = self._parse_select(nested=True, parse_set_operation=False) 4313 4314 this = self.expression( 4315 operation, 4316 comments=comments, 4317 this=this, 4318 distinct=distinct, 4319 by_name=by_name, 4320 expression=expression, 4321 ) 4322 4323 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4324 expression = this.expression 4325 4326 if expression: 4327 for arg in self.SET_OP_MODIFIERS: 4328 expr = expression.args.get(arg) 4329 if expr: 4330 this.set(arg, expr.pop()) 4331 4332 return this 4333 4334 def _parse_expression(self) -> t.Optional[exp.Expression]: 4335 return self._parse_alias(self._parse_assignment()) 4336 4337 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4338 this = self._parse_disjunction() 4339 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4340 # This allows us to parse <non-identifier token> := <expr> 4341 this = exp.column( 4342 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4343 ) 4344 4345 while self._match_set(self.ASSIGNMENT): 4346 if isinstance(this, exp.Column) and len(this.parts) == 1: 4347 this = this.this 4348 4349 this = self.expression( 4350 self.ASSIGNMENT[self._prev.token_type], 4351 this=this, 4352 comments=self._prev_comments, 4353 expression=self._parse_assignment(), 4354 ) 4355 4356 return this 4357 4358 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4359 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4360 4361 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4362 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4363 4364 def _parse_equality(self) -> t.Optional[exp.Expression]: 4365 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4366 4367 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4368 return self._parse_tokens(self._parse_range, self.COMPARISON) 4369 4370 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4371 this = this or self._parse_bitwise() 4372 negate = self._match(TokenType.NOT) 4373 4374 if self._match_set(self.RANGE_PARSERS): 4375 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4376 if not expression: 4377 return this 4378 4379 this = expression 4380 elif self._match(TokenType.ISNULL): 4381 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4382 4383 # Postgres supports ISNULL and NOTNULL for conditions. 4384 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4385 if self._match(TokenType.NOTNULL): 4386 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4387 this = self.expression(exp.Not, this=this) 4388 4389 if negate: 4390 this = self._negate_range(this) 4391 4392 if self._match(TokenType.IS): 4393 this = self._parse_is(this) 4394 4395 return this 4396 4397 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4398 if not this: 4399 return this 4400 4401 return self.expression(exp.Not, this=this) 4402 4403 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4404 index = self._index - 1 4405 negate = self._match(TokenType.NOT) 4406 4407 if self._match_text_seq("DISTINCT", "FROM"): 4408 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4409 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4410 4411 if self._match(TokenType.JSON): 4412 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4413 4414 if self._match_text_seq("WITH"): 4415 _with = True 4416 elif self._match_text_seq("WITHOUT"): 4417 _with = False 4418 else: 4419 _with = None 4420 4421 unique = self._match(TokenType.UNIQUE) 4422 self._match_text_seq("KEYS") 4423 expression: t.Optional[exp.Expression] = self.expression( 4424 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4425 ) 4426 else: 4427 expression = self._parse_primary() or self._parse_null() 4428 if not expression: 4429 self._retreat(index) 4430 return None 4431 4432 this = self.expression(exp.Is, this=this, expression=expression) 4433 return self.expression(exp.Not, this=this) if negate else this 4434 4435 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4436 unnest = self._parse_unnest(with_alias=False) 4437 if unnest: 4438 this = self.expression(exp.In, this=this, unnest=unnest) 4439 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4440 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4441 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4442 4443 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4444 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4445 else: 4446 this = self.expression(exp.In, this=this, expressions=expressions) 4447 4448 if matched_l_paren: 4449 self._match_r_paren(this) 4450 elif not self._match(TokenType.R_BRACKET, expression=this): 4451 self.raise_error("Expecting ]") 4452 else: 4453 this = self.expression(exp.In, this=this, field=self._parse_field()) 4454 4455 return this 4456 4457 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4458 low = self._parse_bitwise() 4459 self._match(TokenType.AND) 4460 high = self._parse_bitwise() 4461 return self.expression(exp.Between, this=this, low=low, high=high) 4462 4463 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4464 if not self._match(TokenType.ESCAPE): 4465 return this 4466 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4467 4468 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4469 index = self._index 4470 4471 if not self._match(TokenType.INTERVAL) and match_interval: 4472 return None 4473 4474 if self._match(TokenType.STRING, advance=False): 4475 this = self._parse_primary() 4476 else: 4477 this = self._parse_term() 4478 4479 if not this or ( 4480 isinstance(this, exp.Column) 4481 and not this.table 4482 and not this.this.quoted 4483 and this.name.upper() == "IS" 4484 ): 4485 self._retreat(index) 4486 return None 4487 4488 unit = self._parse_function() or ( 4489 not self._match(TokenType.ALIAS, advance=False) 4490 and self._parse_var(any_token=True, upper=True) 4491 ) 4492 4493 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4494 # each INTERVAL expression into this canonical form so it's easy to transpile 4495 if this and this.is_number: 4496 this = exp.Literal.string(this.to_py()) 4497 elif this and this.is_string: 4498 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4499 if len(parts) == 1: 4500 if unit: 4501 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4502 self._retreat(self._index - 1) 4503 4504 this = exp.Literal.string(parts[0][0]) 4505 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4506 4507 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4508 unit = self.expression( 4509 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4510 ) 4511 4512 interval = self.expression(exp.Interval, this=this, unit=unit) 4513 4514 index = self._index 4515 self._match(TokenType.PLUS) 4516 4517 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4518 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4519 return self.expression( 4520 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4521 ) 4522 4523 self._retreat(index) 4524 return interval 4525 4526 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4527 this = self._parse_term() 4528 4529 while True: 4530 if self._match_set(self.BITWISE): 4531 this = self.expression( 4532 self.BITWISE[self._prev.token_type], 4533 this=this, 4534 expression=self._parse_term(), 4535 ) 4536 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4537 this = self.expression( 4538 exp.DPipe, 4539 this=this, 4540 expression=self._parse_term(), 4541 safe=not self.dialect.STRICT_STRING_CONCAT, 4542 ) 4543 elif self._match(TokenType.DQMARK): 4544 this = self.expression( 4545 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4546 ) 4547 elif self._match_pair(TokenType.LT, TokenType.LT): 4548 this = self.expression( 4549 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4550 ) 4551 elif self._match_pair(TokenType.GT, TokenType.GT): 4552 this = self.expression( 4553 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4554 ) 4555 else: 4556 break 4557 4558 return this 4559 4560 def _parse_term(self) -> t.Optional[exp.Expression]: 4561 this = self._parse_factor() 4562 4563 while self._match_set(self.TERM): 4564 klass = self.TERM[self._prev.token_type] 4565 comments = self._prev_comments 4566 expression = self._parse_factor() 4567 4568 this = self.expression(klass, this=this, comments=comments, expression=expression) 4569 4570 if isinstance(this, exp.Collate): 4571 expr = this.expression 4572 4573 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4574 # fallback to Identifier / Var 4575 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4576 ident = expr.this 4577 if isinstance(ident, exp.Identifier): 4578 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4579 4580 return this 4581 4582 def _parse_factor(self) -> t.Optional[exp.Expression]: 4583 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4584 this = parse_method() 4585 4586 while self._match_set(self.FACTOR): 4587 klass = self.FACTOR[self._prev.token_type] 4588 comments = self._prev_comments 4589 expression = parse_method() 4590 4591 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4592 self._retreat(self._index - 1) 4593 return this 4594 4595 this = self.expression(klass, this=this, comments=comments, expression=expression) 4596 4597 if isinstance(this, exp.Div): 4598 this.args["typed"] = self.dialect.TYPED_DIVISION 4599 this.args["safe"] = self.dialect.SAFE_DIVISION 4600 4601 return this 4602 4603 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4604 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4605 4606 def _parse_unary(self) -> t.Optional[exp.Expression]: 4607 if self._match_set(self.UNARY_PARSERS): 4608 return self.UNARY_PARSERS[self._prev.token_type](self) 4609 return self._parse_at_time_zone(self._parse_type()) 4610 4611 def _parse_type( 4612 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4613 ) -> t.Optional[exp.Expression]: 4614 interval = parse_interval and self._parse_interval() 4615 if interval: 4616 return interval 4617 4618 index = self._index 4619 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4620 4621 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4622 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4623 if isinstance(data_type, exp.Cast): 4624 # This constructor can contain ops directly after it, for instance struct unnesting: 4625 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4626 return self._parse_column_ops(data_type) 4627 4628 if data_type: 4629 index2 = self._index 4630 this = self._parse_primary() 4631 4632 if isinstance(this, exp.Literal): 4633 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4634 if parser: 4635 return parser(self, this, data_type) 4636 4637 return self.expression(exp.Cast, this=this, to=data_type) 4638 4639 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4640 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4641 # 4642 # If the index difference here is greater than 1, that means the parser itself must have 4643 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4644 # 4645 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4646 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4647 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4648 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4649 # 4650 # In these cases, we don't really want to return the converted type, but instead retreat 4651 # and try to parse a Column or Identifier in the section below. 4652 if data_type.expressions and index2 - index > 1: 4653 self._retreat(index2) 4654 return self._parse_column_ops(data_type) 4655 4656 self._retreat(index) 4657 4658 if fallback_to_identifier: 4659 return self._parse_id_var() 4660 4661 this = self._parse_column() 4662 return this and self._parse_column_ops(this) 4663 4664 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4665 this = self._parse_type() 4666 if not this: 4667 return None 4668 4669 if isinstance(this, exp.Column) and not this.table: 4670 this = exp.var(this.name.upper()) 4671 4672 return self.expression( 4673 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4674 ) 4675 4676 def _parse_types( 4677 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4678 ) -> t.Optional[exp.Expression]: 4679 index = self._index 4680 4681 this: t.Optional[exp.Expression] = None 4682 prefix = self._match_text_seq("SYSUDTLIB", ".") 4683 4684 if not self._match_set(self.TYPE_TOKENS): 4685 identifier = allow_identifiers and self._parse_id_var( 4686 any_token=False, tokens=(TokenType.VAR,) 4687 ) 4688 if isinstance(identifier, exp.Identifier): 4689 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4690 4691 if len(tokens) != 1: 4692 self.raise_error("Unexpected identifier", self._prev) 4693 4694 if tokens[0].token_type in self.TYPE_TOKENS: 4695 self._prev = tokens[0] 4696 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4697 type_name = identifier.name 4698 4699 while self._match(TokenType.DOT): 4700 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4701 4702 this = exp.DataType.build(type_name, udt=True) 4703 else: 4704 self._retreat(self._index - 1) 4705 return None 4706 else: 4707 return None 4708 4709 type_token = self._prev.token_type 4710 4711 if type_token == TokenType.PSEUDO_TYPE: 4712 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4713 4714 if type_token == TokenType.OBJECT_IDENTIFIER: 4715 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4716 4717 # https://materialize.com/docs/sql/types/map/ 4718 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4719 key_type = self._parse_types( 4720 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4721 ) 4722 if not self._match(TokenType.FARROW): 4723 self._retreat(index) 4724 return None 4725 4726 value_type = self._parse_types( 4727 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4728 ) 4729 if not self._match(TokenType.R_BRACKET): 4730 self._retreat(index) 4731 return None 4732 4733 return exp.DataType( 4734 this=exp.DataType.Type.MAP, 4735 expressions=[key_type, value_type], 4736 nested=True, 4737 prefix=prefix, 4738 ) 4739 4740 nested = type_token in self.NESTED_TYPE_TOKENS 4741 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4742 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4743 expressions = None 4744 maybe_func = False 4745 4746 if self._match(TokenType.L_PAREN): 4747 if is_struct: 4748 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4749 elif nested: 4750 expressions = self._parse_csv( 4751 lambda: self._parse_types( 4752 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4753 ) 4754 ) 4755 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4756 this = expressions[0] 4757 this.set("nullable", True) 4758 self._match_r_paren() 4759 return this 4760 elif type_token in self.ENUM_TYPE_TOKENS: 4761 expressions = self._parse_csv(self._parse_equality) 4762 elif is_aggregate: 4763 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4764 any_token=False, tokens=(TokenType.VAR,) 4765 ) 4766 if not func_or_ident or not self._match(TokenType.COMMA): 4767 return None 4768 expressions = self._parse_csv( 4769 lambda: self._parse_types( 4770 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4771 ) 4772 ) 4773 expressions.insert(0, func_or_ident) 4774 else: 4775 expressions = self._parse_csv(self._parse_type_size) 4776 4777 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4778 if type_token == TokenType.VECTOR and len(expressions) == 2: 4779 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4780 4781 if not expressions or not self._match(TokenType.R_PAREN): 4782 self._retreat(index) 4783 return None 4784 4785 maybe_func = True 4786 4787 values: t.Optional[t.List[exp.Expression]] = None 4788 4789 if nested and self._match(TokenType.LT): 4790 if is_struct: 4791 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4792 else: 4793 expressions = self._parse_csv( 4794 lambda: self._parse_types( 4795 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4796 ) 4797 ) 4798 4799 if not self._match(TokenType.GT): 4800 self.raise_error("Expecting >") 4801 4802 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4803 values = self._parse_csv(self._parse_assignment) 4804 if not values and is_struct: 4805 values = None 4806 self._retreat(self._index - 1) 4807 else: 4808 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4809 4810 if type_token in self.TIMESTAMPS: 4811 if self._match_text_seq("WITH", "TIME", "ZONE"): 4812 maybe_func = False 4813 tz_type = ( 4814 exp.DataType.Type.TIMETZ 4815 if type_token in self.TIMES 4816 else exp.DataType.Type.TIMESTAMPTZ 4817 ) 4818 this = exp.DataType(this=tz_type, expressions=expressions) 4819 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4820 maybe_func = False 4821 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4822 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4823 maybe_func = False 4824 elif type_token == TokenType.INTERVAL: 4825 unit = self._parse_var(upper=True) 4826 if unit: 4827 if self._match_text_seq("TO"): 4828 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4829 4830 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4831 else: 4832 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4833 4834 if maybe_func and check_func: 4835 index2 = self._index 4836 peek = self._parse_string() 4837 4838 if not peek: 4839 self._retreat(index) 4840 return None 4841 4842 self._retreat(index2) 4843 4844 if not this: 4845 if self._match_text_seq("UNSIGNED"): 4846 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4847 if not unsigned_type_token: 4848 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4849 4850 type_token = unsigned_type_token or type_token 4851 4852 this = exp.DataType( 4853 this=exp.DataType.Type[type_token.value], 4854 expressions=expressions, 4855 nested=nested, 4856 prefix=prefix, 4857 ) 4858 4859 # Empty arrays/structs are allowed 4860 if values is not None: 4861 cls = exp.Struct if is_struct else exp.Array 4862 this = exp.cast(cls(expressions=values), this, copy=False) 4863 4864 elif expressions: 4865 this.set("expressions", expressions) 4866 4867 # https://materialize.com/docs/sql/types/list/#type-name 4868 while self._match(TokenType.LIST): 4869 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4870 4871 index = self._index 4872 4873 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4874 matched_array = self._match(TokenType.ARRAY) 4875 4876 while self._curr: 4877 datatype_token = self._prev.token_type 4878 matched_l_bracket = self._match(TokenType.L_BRACKET) 4879 if not matched_l_bracket and not matched_array: 4880 break 4881 4882 matched_array = False 4883 values = self._parse_csv(self._parse_assignment) or None 4884 if ( 4885 values 4886 and not schema 4887 and ( 4888 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4889 ) 4890 ): 4891 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4892 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4893 self._retreat(index) 4894 break 4895 4896 this = exp.DataType( 4897 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4898 ) 4899 self._match(TokenType.R_BRACKET) 4900 4901 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4902 converter = self.TYPE_CONVERTERS.get(this.this) 4903 if converter: 4904 this = converter(t.cast(exp.DataType, this)) 4905 4906 return this 4907 4908 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4909 index = self._index 4910 4911 if ( 4912 self._curr 4913 and self._next 4914 and self._curr.token_type in self.TYPE_TOKENS 4915 and self._next.token_type in self.TYPE_TOKENS 4916 ): 4917 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4918 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4919 this = self._parse_id_var() 4920 else: 4921 this = ( 4922 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4923 or self._parse_id_var() 4924 ) 4925 4926 self._match(TokenType.COLON) 4927 4928 if ( 4929 type_required 4930 and not isinstance(this, exp.DataType) 4931 and not self._match_set(self.TYPE_TOKENS, advance=False) 4932 ): 4933 self._retreat(index) 4934 return self._parse_types() 4935 4936 return self._parse_column_def(this) 4937 4938 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4939 if not self._match_text_seq("AT", "TIME", "ZONE"): 4940 return this 4941 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4942 4943 def _parse_column(self) -> t.Optional[exp.Expression]: 4944 this = self._parse_column_reference() 4945 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4946 4947 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4948 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4949 4950 return column 4951 4952 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4953 this = self._parse_field() 4954 if ( 4955 not this 4956 and self._match(TokenType.VALUES, advance=False) 4957 and self.VALUES_FOLLOWED_BY_PAREN 4958 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4959 ): 4960 this = self._parse_id_var() 4961 4962 if isinstance(this, exp.Identifier): 4963 # We bubble up comments from the Identifier to the Column 4964 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4965 4966 return this 4967 4968 def _parse_colon_as_variant_extract( 4969 self, this: t.Optional[exp.Expression] 4970 ) -> t.Optional[exp.Expression]: 4971 casts = [] 4972 json_path = [] 4973 escape = None 4974 4975 while self._match(TokenType.COLON): 4976 start_index = self._index 4977 4978 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4979 path = self._parse_column_ops( 4980 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4981 ) 4982 4983 # The cast :: operator has a lower precedence than the extraction operator :, so 4984 # we rearrange the AST appropriately to avoid casting the JSON path 4985 while isinstance(path, exp.Cast): 4986 casts.append(path.to) 4987 path = path.this 4988 4989 if casts: 4990 dcolon_offset = next( 4991 i 4992 for i, t in enumerate(self._tokens[start_index:]) 4993 if t.token_type == TokenType.DCOLON 4994 ) 4995 end_token = self._tokens[start_index + dcolon_offset - 1] 4996 else: 4997 end_token = self._prev 4998 4999 if path: 5000 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5001 # it'll roundtrip to a string literal in GET_PATH 5002 if isinstance(path, exp.Identifier) and path.quoted: 5003 escape = True 5004 5005 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5006 5007 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5008 # Databricks transforms it back to the colon/dot notation 5009 if json_path: 5010 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5011 5012 if json_path_expr: 5013 json_path_expr.set("escape", escape) 5014 5015 this = self.expression( 5016 exp.JSONExtract, 5017 this=this, 5018 expression=json_path_expr, 5019 variant_extract=True, 5020 ) 5021 5022 while casts: 5023 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5024 5025 return this 5026 5027 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5028 return self._parse_types() 5029 5030 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5031 this = self._parse_bracket(this) 5032 5033 while self._match_set(self.COLUMN_OPERATORS): 5034 op_token = self._prev.token_type 5035 op = self.COLUMN_OPERATORS.get(op_token) 5036 5037 if op_token == TokenType.DCOLON: 5038 field = self._parse_dcolon() 5039 if not field: 5040 self.raise_error("Expected type") 5041 elif op and self._curr: 5042 field = self._parse_column_reference() 5043 else: 5044 field = self._parse_field(any_token=True, anonymous_func=True) 5045 5046 if isinstance(field, exp.Func) and this: 5047 # bigquery allows function calls like x.y.count(...) 5048 # SAFE.SUBSTR(...) 5049 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5050 this = exp.replace_tree( 5051 this, 5052 lambda n: ( 5053 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5054 if n.table 5055 else n.this 5056 ) 5057 if isinstance(n, exp.Column) 5058 else n, 5059 ) 5060 5061 if op: 5062 this = op(self, this, field) 5063 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5064 this = self.expression( 5065 exp.Column, 5066 this=field, 5067 table=this.this, 5068 db=this.args.get("table"), 5069 catalog=this.args.get("db"), 5070 ) 5071 else: 5072 this = self.expression(exp.Dot, this=this, expression=field) 5073 5074 this = self._parse_bracket(this) 5075 5076 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5077 5078 def _parse_primary(self) -> t.Optional[exp.Expression]: 5079 if self._match_set(self.PRIMARY_PARSERS): 5080 token_type = self._prev.token_type 5081 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5082 5083 if token_type == TokenType.STRING: 5084 expressions = [primary] 5085 while self._match(TokenType.STRING): 5086 expressions.append(exp.Literal.string(self._prev.text)) 5087 5088 if len(expressions) > 1: 5089 return self.expression(exp.Concat, expressions=expressions) 5090 5091 return primary 5092 5093 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5094 return exp.Literal.number(f"0.{self._prev.text}") 5095 5096 if self._match(TokenType.L_PAREN): 5097 comments = self._prev_comments 5098 query = self._parse_select() 5099 5100 if query: 5101 expressions = [query] 5102 else: 5103 expressions = self._parse_expressions() 5104 5105 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5106 5107 if not this and self._match(TokenType.R_PAREN, advance=False): 5108 this = self.expression(exp.Tuple) 5109 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5110 this = self._parse_subquery(this=this, parse_alias=False) 5111 elif isinstance(this, exp.Subquery): 5112 this = self._parse_subquery( 5113 this=self._parse_set_operations(this), parse_alias=False 5114 ) 5115 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5116 this = self.expression(exp.Tuple, expressions=expressions) 5117 else: 5118 this = self.expression(exp.Paren, this=this) 5119 5120 if this: 5121 this.add_comments(comments) 5122 5123 self._match_r_paren(expression=this) 5124 return this 5125 5126 return None 5127 5128 def _parse_field( 5129 self, 5130 any_token: bool = False, 5131 tokens: t.Optional[t.Collection[TokenType]] = None, 5132 anonymous_func: bool = False, 5133 ) -> t.Optional[exp.Expression]: 5134 if anonymous_func: 5135 field = ( 5136 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5137 or self._parse_primary() 5138 ) 5139 else: 5140 field = self._parse_primary() or self._parse_function( 5141 anonymous=anonymous_func, any_token=any_token 5142 ) 5143 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5144 5145 def _parse_function( 5146 self, 5147 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5148 anonymous: bool = False, 5149 optional_parens: bool = True, 5150 any_token: bool = False, 5151 ) -> t.Optional[exp.Expression]: 5152 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5153 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5154 fn_syntax = False 5155 if ( 5156 self._match(TokenType.L_BRACE, advance=False) 5157 and self._next 5158 and self._next.text.upper() == "FN" 5159 ): 5160 self._advance(2) 5161 fn_syntax = True 5162 5163 func = self._parse_function_call( 5164 functions=functions, 5165 anonymous=anonymous, 5166 optional_parens=optional_parens, 5167 any_token=any_token, 5168 ) 5169 5170 if fn_syntax: 5171 self._match(TokenType.R_BRACE) 5172 5173 return func 5174 5175 def _parse_function_call( 5176 self, 5177 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5178 anonymous: bool = False, 5179 optional_parens: bool = True, 5180 any_token: bool = False, 5181 ) -> t.Optional[exp.Expression]: 5182 if not self._curr: 5183 return None 5184 5185 comments = self._curr.comments 5186 token_type = self._curr.token_type 5187 this = self._curr.text 5188 upper = this.upper() 5189 5190 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5191 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5192 self._advance() 5193 return self._parse_window(parser(self)) 5194 5195 if not self._next or self._next.token_type != TokenType.L_PAREN: 5196 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5197 self._advance() 5198 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5199 5200 return None 5201 5202 if any_token: 5203 if token_type in self.RESERVED_TOKENS: 5204 return None 5205 elif token_type not in self.FUNC_TOKENS: 5206 return None 5207 5208 self._advance(2) 5209 5210 parser = self.FUNCTION_PARSERS.get(upper) 5211 if parser and not anonymous: 5212 this = parser(self) 5213 else: 5214 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5215 5216 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5217 this = self.expression(subquery_predicate, this=self._parse_select()) 5218 self._match_r_paren() 5219 return this 5220 5221 if functions is None: 5222 functions = self.FUNCTIONS 5223 5224 function = functions.get(upper) 5225 5226 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5227 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5228 5229 if alias: 5230 args = self._kv_to_prop_eq(args) 5231 5232 if function and not anonymous: 5233 if "dialect" in function.__code__.co_varnames: 5234 func = function(args, dialect=self.dialect) 5235 else: 5236 func = function(args) 5237 5238 func = self.validate_expression(func, args) 5239 if not self.dialect.NORMALIZE_FUNCTIONS: 5240 func.meta["name"] = this 5241 5242 this = func 5243 else: 5244 if token_type == TokenType.IDENTIFIER: 5245 this = exp.Identifier(this=this, quoted=True) 5246 this = self.expression(exp.Anonymous, this=this, expressions=args) 5247 5248 if isinstance(this, exp.Expression): 5249 this.add_comments(comments) 5250 5251 self._match_r_paren(this) 5252 return self._parse_window(this) 5253 5254 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5255 return expression 5256 5257 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5258 transformed = [] 5259 5260 for index, e in enumerate(expressions): 5261 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5262 if isinstance(e, exp.Alias): 5263 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5264 5265 if not isinstance(e, exp.PropertyEQ): 5266 e = self.expression( 5267 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5268 ) 5269 5270 if isinstance(e.this, exp.Column): 5271 e.this.replace(e.this.this) 5272 else: 5273 e = self._to_prop_eq(e, index) 5274 5275 transformed.append(e) 5276 5277 return transformed 5278 5279 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5280 return self._parse_column_def(self._parse_id_var()) 5281 5282 def _parse_user_defined_function( 5283 self, kind: t.Optional[TokenType] = None 5284 ) -> t.Optional[exp.Expression]: 5285 this = self._parse_id_var() 5286 5287 while self._match(TokenType.DOT): 5288 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5289 5290 if not self._match(TokenType.L_PAREN): 5291 return this 5292 5293 expressions = self._parse_csv(self._parse_function_parameter) 5294 self._match_r_paren() 5295 return self.expression( 5296 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5297 ) 5298 5299 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5300 literal = self._parse_primary() 5301 if literal: 5302 return self.expression(exp.Introducer, this=token.text, expression=literal) 5303 5304 return self.expression(exp.Identifier, this=token.text) 5305 5306 def _parse_session_parameter(self) -> exp.SessionParameter: 5307 kind = None 5308 this = self._parse_id_var() or self._parse_primary() 5309 5310 if this and self._match(TokenType.DOT): 5311 kind = this.name 5312 this = self._parse_var() or self._parse_primary() 5313 5314 return self.expression(exp.SessionParameter, this=this, kind=kind) 5315 5316 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5317 return self._parse_id_var() 5318 5319 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5320 index = self._index 5321 5322 if self._match(TokenType.L_PAREN): 5323 expressions = t.cast( 5324 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5325 ) 5326 5327 if not self._match(TokenType.R_PAREN): 5328 self._retreat(index) 5329 else: 5330 expressions = [self._parse_lambda_arg()] 5331 5332 if self._match_set(self.LAMBDAS): 5333 return self.LAMBDAS[self._prev.token_type](self, expressions) 5334 5335 self._retreat(index) 5336 5337 this: t.Optional[exp.Expression] 5338 5339 if self._match(TokenType.DISTINCT): 5340 this = self.expression( 5341 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5342 ) 5343 else: 5344 this = self._parse_select_or_expression(alias=alias) 5345 5346 return self._parse_limit( 5347 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5348 ) 5349 5350 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5351 index = self._index 5352 if not self._match(TokenType.L_PAREN): 5353 return this 5354 5355 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5356 # expr can be of both types 5357 if self._match_set(self.SELECT_START_TOKENS): 5358 self._retreat(index) 5359 return this 5360 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5361 self._match_r_paren() 5362 return self.expression(exp.Schema, this=this, expressions=args) 5363 5364 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5365 return self._parse_column_def(self._parse_field(any_token=True)) 5366 5367 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5368 # column defs are not really columns, they're identifiers 5369 if isinstance(this, exp.Column): 5370 this = this.this 5371 5372 kind = self._parse_types(schema=True) 5373 5374 if self._match_text_seq("FOR", "ORDINALITY"): 5375 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5376 5377 constraints: t.List[exp.Expression] = [] 5378 5379 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5380 ("ALIAS", "MATERIALIZED") 5381 ): 5382 persisted = self._prev.text.upper() == "MATERIALIZED" 5383 constraint_kind = exp.ComputedColumnConstraint( 5384 this=self._parse_assignment(), 5385 persisted=persisted or self._match_text_seq("PERSISTED"), 5386 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5387 ) 5388 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5389 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5390 self._match(TokenType.ALIAS) 5391 constraints.append( 5392 self.expression( 5393 exp.ColumnConstraint, 5394 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5395 ) 5396 ) 5397 5398 while True: 5399 constraint = self._parse_column_constraint() 5400 if not constraint: 5401 break 5402 constraints.append(constraint) 5403 5404 if not kind and not constraints: 5405 return this 5406 5407 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5408 5409 def _parse_auto_increment( 5410 self, 5411 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5412 start = None 5413 increment = None 5414 5415 if self._match(TokenType.L_PAREN, advance=False): 5416 args = self._parse_wrapped_csv(self._parse_bitwise) 5417 start = seq_get(args, 0) 5418 increment = seq_get(args, 1) 5419 elif self._match_text_seq("START"): 5420 start = self._parse_bitwise() 5421 self._match_text_seq("INCREMENT") 5422 increment = self._parse_bitwise() 5423 5424 if start and increment: 5425 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5426 5427 return exp.AutoIncrementColumnConstraint() 5428 5429 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5430 if not self._match_text_seq("REFRESH"): 5431 self._retreat(self._index - 1) 5432 return None 5433 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5434 5435 def _parse_compress(self) -> exp.CompressColumnConstraint: 5436 if self._match(TokenType.L_PAREN, advance=False): 5437 return self.expression( 5438 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5439 ) 5440 5441 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5442 5443 def _parse_generated_as_identity( 5444 self, 5445 ) -> ( 5446 exp.GeneratedAsIdentityColumnConstraint 5447 | exp.ComputedColumnConstraint 5448 | exp.GeneratedAsRowColumnConstraint 5449 ): 5450 if self._match_text_seq("BY", "DEFAULT"): 5451 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5452 this = self.expression( 5453 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5454 ) 5455 else: 5456 self._match_text_seq("ALWAYS") 5457 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5458 5459 self._match(TokenType.ALIAS) 5460 5461 if self._match_text_seq("ROW"): 5462 start = self._match_text_seq("START") 5463 if not start: 5464 self._match(TokenType.END) 5465 hidden = self._match_text_seq("HIDDEN") 5466 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5467 5468 identity = self._match_text_seq("IDENTITY") 5469 5470 if self._match(TokenType.L_PAREN): 5471 if self._match(TokenType.START_WITH): 5472 this.set("start", self._parse_bitwise()) 5473 if self._match_text_seq("INCREMENT", "BY"): 5474 this.set("increment", self._parse_bitwise()) 5475 if self._match_text_seq("MINVALUE"): 5476 this.set("minvalue", self._parse_bitwise()) 5477 if self._match_text_seq("MAXVALUE"): 5478 this.set("maxvalue", self._parse_bitwise()) 5479 5480 if self._match_text_seq("CYCLE"): 5481 this.set("cycle", True) 5482 elif self._match_text_seq("NO", "CYCLE"): 5483 this.set("cycle", False) 5484 5485 if not identity: 5486 this.set("expression", self._parse_range()) 5487 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5488 args = self._parse_csv(self._parse_bitwise) 5489 this.set("start", seq_get(args, 0)) 5490 this.set("increment", seq_get(args, 1)) 5491 5492 self._match_r_paren() 5493 5494 return this 5495 5496 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5497 self._match_text_seq("LENGTH") 5498 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5499 5500 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5501 if self._match_text_seq("NULL"): 5502 return self.expression(exp.NotNullColumnConstraint) 5503 if self._match_text_seq("CASESPECIFIC"): 5504 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5505 if self._match_text_seq("FOR", "REPLICATION"): 5506 return self.expression(exp.NotForReplicationColumnConstraint) 5507 5508 # Unconsume the `NOT` token 5509 self._retreat(self._index - 1) 5510 return None 5511 5512 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5513 if self._match(TokenType.CONSTRAINT): 5514 this = self._parse_id_var() 5515 else: 5516 this = None 5517 5518 if self._match_texts(self.CONSTRAINT_PARSERS): 5519 return self.expression( 5520 exp.ColumnConstraint, 5521 this=this, 5522 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5523 ) 5524 5525 return this 5526 5527 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5528 if not self._match(TokenType.CONSTRAINT): 5529 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5530 5531 return self.expression( 5532 exp.Constraint, 5533 this=self._parse_id_var(), 5534 expressions=self._parse_unnamed_constraints(), 5535 ) 5536 5537 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5538 constraints = [] 5539 while True: 5540 constraint = self._parse_unnamed_constraint() or self._parse_function() 5541 if not constraint: 5542 break 5543 constraints.append(constraint) 5544 5545 return constraints 5546 5547 def _parse_unnamed_constraint( 5548 self, constraints: t.Optional[t.Collection[str]] = None 5549 ) -> t.Optional[exp.Expression]: 5550 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5551 constraints or self.CONSTRAINT_PARSERS 5552 ): 5553 return None 5554 5555 constraint = self._prev.text.upper() 5556 if constraint not in self.CONSTRAINT_PARSERS: 5557 self.raise_error(f"No parser found for schema constraint {constraint}.") 5558 5559 return self.CONSTRAINT_PARSERS[constraint](self) 5560 5561 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5562 return self._parse_id_var(any_token=False) 5563 5564 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5565 self._match_text_seq("KEY") 5566 return self.expression( 5567 exp.UniqueColumnConstraint, 5568 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5569 this=self._parse_schema(self._parse_unique_key()), 5570 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5571 on_conflict=self._parse_on_conflict(), 5572 ) 5573 5574 def _parse_key_constraint_options(self) -> t.List[str]: 5575 options = [] 5576 while True: 5577 if not self._curr: 5578 break 5579 5580 if self._match(TokenType.ON): 5581 action = None 5582 on = self._advance_any() and self._prev.text 5583 5584 if self._match_text_seq("NO", "ACTION"): 5585 action = "NO ACTION" 5586 elif self._match_text_seq("CASCADE"): 5587 action = "CASCADE" 5588 elif self._match_text_seq("RESTRICT"): 5589 action = "RESTRICT" 5590 elif self._match_pair(TokenType.SET, TokenType.NULL): 5591 action = "SET NULL" 5592 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5593 action = "SET DEFAULT" 5594 else: 5595 self.raise_error("Invalid key constraint") 5596 5597 options.append(f"ON {on} {action}") 5598 else: 5599 var = self._parse_var_from_options( 5600 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5601 ) 5602 if not var: 5603 break 5604 options.append(var.name) 5605 5606 return options 5607 5608 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5609 if match and not self._match(TokenType.REFERENCES): 5610 return None 5611 5612 expressions = None 5613 this = self._parse_table(schema=True) 5614 options = self._parse_key_constraint_options() 5615 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5616 5617 def _parse_foreign_key(self) -> exp.ForeignKey: 5618 expressions = self._parse_wrapped_id_vars() 5619 reference = self._parse_references() 5620 options = {} 5621 5622 while self._match(TokenType.ON): 5623 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5624 self.raise_error("Expected DELETE or UPDATE") 5625 5626 kind = self._prev.text.lower() 5627 5628 if self._match_text_seq("NO", "ACTION"): 5629 action = "NO ACTION" 5630 elif self._match(TokenType.SET): 5631 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5632 action = "SET " + self._prev.text.upper() 5633 else: 5634 self._advance() 5635 action = self._prev.text.upper() 5636 5637 options[kind] = action 5638 5639 return self.expression( 5640 exp.ForeignKey, 5641 expressions=expressions, 5642 reference=reference, 5643 **options, # type: ignore 5644 ) 5645 5646 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5647 return self._parse_field() 5648 5649 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5650 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5651 self._retreat(self._index - 1) 5652 return None 5653 5654 id_vars = self._parse_wrapped_id_vars() 5655 return self.expression( 5656 exp.PeriodForSystemTimeConstraint, 5657 this=seq_get(id_vars, 0), 5658 expression=seq_get(id_vars, 1), 5659 ) 5660 5661 def _parse_primary_key( 5662 self, wrapped_optional: bool = False, in_props: bool = False 5663 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5664 desc = ( 5665 self._match_set((TokenType.ASC, TokenType.DESC)) 5666 and self._prev.token_type == TokenType.DESC 5667 ) 5668 5669 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5670 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5671 5672 expressions = self._parse_wrapped_csv( 5673 self._parse_primary_key_part, optional=wrapped_optional 5674 ) 5675 options = self._parse_key_constraint_options() 5676 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5677 5678 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5679 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5680 5681 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5682 """ 5683 Parses a datetime column in ODBC format. We parse the column into the corresponding 5684 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5685 same as we did for `DATE('yyyy-mm-dd')`. 5686 5687 Reference: 5688 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5689 """ 5690 self._match(TokenType.VAR) 5691 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5692 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5693 if not self._match(TokenType.R_BRACE): 5694 self.raise_error("Expected }") 5695 return expression 5696 5697 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5698 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5699 return this 5700 5701 bracket_kind = self._prev.token_type 5702 if ( 5703 bracket_kind == TokenType.L_BRACE 5704 and self._curr 5705 and self._curr.token_type == TokenType.VAR 5706 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5707 ): 5708 return self._parse_odbc_datetime_literal() 5709 5710 expressions = self._parse_csv( 5711 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5712 ) 5713 5714 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5715 self.raise_error("Expected ]") 5716 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5717 self.raise_error("Expected }") 5718 5719 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5720 if bracket_kind == TokenType.L_BRACE: 5721 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5722 elif not this: 5723 this = build_array_constructor( 5724 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5725 ) 5726 else: 5727 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5728 if constructor_type: 5729 return build_array_constructor( 5730 constructor_type, 5731 args=expressions, 5732 bracket_kind=bracket_kind, 5733 dialect=self.dialect, 5734 ) 5735 5736 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5737 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5738 5739 self._add_comments(this) 5740 return self._parse_bracket(this) 5741 5742 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5743 if self._match(TokenType.COLON): 5744 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5745 return this 5746 5747 def _parse_case(self) -> t.Optional[exp.Expression]: 5748 ifs = [] 5749 default = None 5750 5751 comments = self._prev_comments 5752 expression = self._parse_assignment() 5753 5754 while self._match(TokenType.WHEN): 5755 this = self._parse_assignment() 5756 self._match(TokenType.THEN) 5757 then = self._parse_assignment() 5758 ifs.append(self.expression(exp.If, this=this, true=then)) 5759 5760 if self._match(TokenType.ELSE): 5761 default = self._parse_assignment() 5762 5763 if not self._match(TokenType.END): 5764 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5765 default = exp.column("interval") 5766 else: 5767 self.raise_error("Expected END after CASE", self._prev) 5768 5769 return self.expression( 5770 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5771 ) 5772 5773 def _parse_if(self) -> t.Optional[exp.Expression]: 5774 if self._match(TokenType.L_PAREN): 5775 args = self._parse_csv(self._parse_assignment) 5776 this = self.validate_expression(exp.If.from_arg_list(args), args) 5777 self._match_r_paren() 5778 else: 5779 index = self._index - 1 5780 5781 if self.NO_PAREN_IF_COMMANDS and index == 0: 5782 return self._parse_as_command(self._prev) 5783 5784 condition = self._parse_assignment() 5785 5786 if not condition: 5787 self._retreat(index) 5788 return None 5789 5790 self._match(TokenType.THEN) 5791 true = self._parse_assignment() 5792 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5793 self._match(TokenType.END) 5794 this = self.expression(exp.If, this=condition, true=true, false=false) 5795 5796 return this 5797 5798 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5799 if not self._match_text_seq("VALUE", "FOR"): 5800 self._retreat(self._index - 1) 5801 return None 5802 5803 return self.expression( 5804 exp.NextValueFor, 5805 this=self._parse_column(), 5806 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5807 ) 5808 5809 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5810 this = self._parse_function() or self._parse_var_or_string(upper=True) 5811 5812 if self._match(TokenType.FROM): 5813 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5814 5815 if not self._match(TokenType.COMMA): 5816 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5817 5818 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5819 5820 def _parse_gap_fill(self) -> exp.GapFill: 5821 self._match(TokenType.TABLE) 5822 this = self._parse_table() 5823 5824 self._match(TokenType.COMMA) 5825 args = [this, *self._parse_csv(self._parse_lambda)] 5826 5827 gap_fill = exp.GapFill.from_arg_list(args) 5828 return self.validate_expression(gap_fill, args) 5829 5830 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5831 this = self._parse_assignment() 5832 5833 if not self._match(TokenType.ALIAS): 5834 if self._match(TokenType.COMMA): 5835 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5836 5837 self.raise_error("Expected AS after CAST") 5838 5839 fmt = None 5840 to = self._parse_types() 5841 5842 if self._match(TokenType.FORMAT): 5843 fmt_string = self._parse_string() 5844 fmt = self._parse_at_time_zone(fmt_string) 5845 5846 if not to: 5847 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5848 if to.this in exp.DataType.TEMPORAL_TYPES: 5849 this = self.expression( 5850 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5851 this=this, 5852 format=exp.Literal.string( 5853 format_time( 5854 fmt_string.this if fmt_string else "", 5855 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5856 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5857 ) 5858 ), 5859 safe=safe, 5860 ) 5861 5862 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5863 this.set("zone", fmt.args["zone"]) 5864 return this 5865 elif not to: 5866 self.raise_error("Expected TYPE after CAST") 5867 elif isinstance(to, exp.Identifier): 5868 to = exp.DataType.build(to.name, udt=True) 5869 elif to.this == exp.DataType.Type.CHAR: 5870 if self._match(TokenType.CHARACTER_SET): 5871 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5872 5873 return self.expression( 5874 exp.Cast if strict else exp.TryCast, 5875 this=this, 5876 to=to, 5877 format=fmt, 5878 safe=safe, 5879 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5880 ) 5881 5882 def _parse_string_agg(self) -> exp.Expression: 5883 if self._match(TokenType.DISTINCT): 5884 args: t.List[t.Optional[exp.Expression]] = [ 5885 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5886 ] 5887 if self._match(TokenType.COMMA): 5888 args.extend(self._parse_csv(self._parse_assignment)) 5889 else: 5890 args = self._parse_csv(self._parse_assignment) # type: ignore 5891 5892 index = self._index 5893 if not self._match(TokenType.R_PAREN) and args: 5894 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5895 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5896 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5897 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5898 5899 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5900 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5901 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5902 if not self._match_text_seq("WITHIN", "GROUP"): 5903 self._retreat(index) 5904 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5905 5906 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5907 order = self._parse_order(this=seq_get(args, 0)) 5908 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5909 5910 def _parse_convert( 5911 self, strict: bool, safe: t.Optional[bool] = None 5912 ) -> t.Optional[exp.Expression]: 5913 this = self._parse_bitwise() 5914 5915 if self._match(TokenType.USING): 5916 to: t.Optional[exp.Expression] = self.expression( 5917 exp.CharacterSet, this=self._parse_var() 5918 ) 5919 elif self._match(TokenType.COMMA): 5920 to = self._parse_types() 5921 else: 5922 to = None 5923 5924 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5925 5926 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5927 """ 5928 There are generally two variants of the DECODE function: 5929 5930 - DECODE(bin, charset) 5931 - DECODE(expression, search, result [, search, result] ... [, default]) 5932 5933 The second variant will always be parsed into a CASE expression. Note that NULL 5934 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5935 instead of relying on pattern matching. 5936 """ 5937 args = self._parse_csv(self._parse_assignment) 5938 5939 if len(args) < 3: 5940 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5941 5942 expression, *expressions = args 5943 if not expression: 5944 return None 5945 5946 ifs = [] 5947 for search, result in zip(expressions[::2], expressions[1::2]): 5948 if not search or not result: 5949 return None 5950 5951 if isinstance(search, exp.Literal): 5952 ifs.append( 5953 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5954 ) 5955 elif isinstance(search, exp.Null): 5956 ifs.append( 5957 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5958 ) 5959 else: 5960 cond = exp.or_( 5961 exp.EQ(this=expression.copy(), expression=search), 5962 exp.and_( 5963 exp.Is(this=expression.copy(), expression=exp.Null()), 5964 exp.Is(this=search.copy(), expression=exp.Null()), 5965 copy=False, 5966 ), 5967 copy=False, 5968 ) 5969 ifs.append(exp.If(this=cond, true=result)) 5970 5971 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5972 5973 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5974 self._match_text_seq("KEY") 5975 key = self._parse_column() 5976 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5977 self._match_text_seq("VALUE") 5978 value = self._parse_bitwise() 5979 5980 if not key and not value: 5981 return None 5982 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5983 5984 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5985 if not this or not self._match_text_seq("FORMAT", "JSON"): 5986 return this 5987 5988 return self.expression(exp.FormatJson, this=this) 5989 5990 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5991 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5992 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5993 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5994 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5995 else: 5996 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5997 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5998 5999 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6000 6001 if not empty and not error and not null: 6002 return None 6003 6004 return self.expression( 6005 exp.OnCondition, 6006 empty=empty, 6007 error=error, 6008 null=null, 6009 ) 6010 6011 def _parse_on_handling( 6012 self, on: str, *values: str 6013 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6014 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6015 for value in values: 6016 if self._match_text_seq(value, "ON", on): 6017 return f"{value} ON {on}" 6018 6019 index = self._index 6020 if self._match(TokenType.DEFAULT): 6021 default_value = self._parse_bitwise() 6022 if self._match_text_seq("ON", on): 6023 return default_value 6024 6025 self._retreat(index) 6026 6027 return None 6028 6029 @t.overload 6030 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6031 6032 @t.overload 6033 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6034 6035 def _parse_json_object(self, agg=False): 6036 star = self._parse_star() 6037 expressions = ( 6038 [star] 6039 if star 6040 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6041 ) 6042 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6043 6044 unique_keys = None 6045 if self._match_text_seq("WITH", "UNIQUE"): 6046 unique_keys = True 6047 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6048 unique_keys = False 6049 6050 self._match_text_seq("KEYS") 6051 6052 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6053 self._parse_type() 6054 ) 6055 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6056 6057 return self.expression( 6058 exp.JSONObjectAgg if agg else exp.JSONObject, 6059 expressions=expressions, 6060 null_handling=null_handling, 6061 unique_keys=unique_keys, 6062 return_type=return_type, 6063 encoding=encoding, 6064 ) 6065 6066 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6067 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6068 if not self._match_text_seq("NESTED"): 6069 this = self._parse_id_var() 6070 kind = self._parse_types(allow_identifiers=False) 6071 nested = None 6072 else: 6073 this = None 6074 kind = None 6075 nested = True 6076 6077 path = self._match_text_seq("PATH") and self._parse_string() 6078 nested_schema = nested and self._parse_json_schema() 6079 6080 return self.expression( 6081 exp.JSONColumnDef, 6082 this=this, 6083 kind=kind, 6084 path=path, 6085 nested_schema=nested_schema, 6086 ) 6087 6088 def _parse_json_schema(self) -> exp.JSONSchema: 6089 self._match_text_seq("COLUMNS") 6090 return self.expression( 6091 exp.JSONSchema, 6092 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6093 ) 6094 6095 def _parse_json_table(self) -> exp.JSONTable: 6096 this = self._parse_format_json(self._parse_bitwise()) 6097 path = self._match(TokenType.COMMA) and self._parse_string() 6098 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6099 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6100 schema = self._parse_json_schema() 6101 6102 return exp.JSONTable( 6103 this=this, 6104 schema=schema, 6105 path=path, 6106 error_handling=error_handling, 6107 empty_handling=empty_handling, 6108 ) 6109 6110 def _parse_match_against(self) -> exp.MatchAgainst: 6111 expressions = self._parse_csv(self._parse_column) 6112 6113 self._match_text_seq(")", "AGAINST", "(") 6114 6115 this = self._parse_string() 6116 6117 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6118 modifier = "IN NATURAL LANGUAGE MODE" 6119 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6120 modifier = f"{modifier} WITH QUERY EXPANSION" 6121 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6122 modifier = "IN BOOLEAN MODE" 6123 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6124 modifier = "WITH QUERY EXPANSION" 6125 else: 6126 modifier = None 6127 6128 return self.expression( 6129 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6130 ) 6131 6132 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6133 def _parse_open_json(self) -> exp.OpenJSON: 6134 this = self._parse_bitwise() 6135 path = self._match(TokenType.COMMA) and self._parse_string() 6136 6137 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6138 this = self._parse_field(any_token=True) 6139 kind = self._parse_types() 6140 path = self._parse_string() 6141 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6142 6143 return self.expression( 6144 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6145 ) 6146 6147 expressions = None 6148 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6149 self._match_l_paren() 6150 expressions = self._parse_csv(_parse_open_json_column_def) 6151 6152 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6153 6154 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6155 args = self._parse_csv(self._parse_bitwise) 6156 6157 if self._match(TokenType.IN): 6158 return self.expression( 6159 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6160 ) 6161 6162 if haystack_first: 6163 haystack = seq_get(args, 0) 6164 needle = seq_get(args, 1) 6165 else: 6166 needle = seq_get(args, 0) 6167 haystack = seq_get(args, 1) 6168 6169 return self.expression( 6170 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6171 ) 6172 6173 def _parse_predict(self) -> exp.Predict: 6174 self._match_text_seq("MODEL") 6175 this = self._parse_table() 6176 6177 self._match(TokenType.COMMA) 6178 self._match_text_seq("TABLE") 6179 6180 return self.expression( 6181 exp.Predict, 6182 this=this, 6183 expression=self._parse_table(), 6184 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6185 ) 6186 6187 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6188 args = self._parse_csv(self._parse_table) 6189 return exp.JoinHint(this=func_name.upper(), expressions=args) 6190 6191 def _parse_substring(self) -> exp.Substring: 6192 # Postgres supports the form: substring(string [from int] [for int]) 6193 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6194 6195 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6196 6197 if self._match(TokenType.FROM): 6198 args.append(self._parse_bitwise()) 6199 if self._match(TokenType.FOR): 6200 if len(args) == 1: 6201 args.append(exp.Literal.number(1)) 6202 args.append(self._parse_bitwise()) 6203 6204 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6205 6206 def _parse_trim(self) -> exp.Trim: 6207 # https://www.w3resource.com/sql/character-functions/trim.php 6208 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6209 6210 position = None 6211 collation = None 6212 expression = None 6213 6214 if self._match_texts(self.TRIM_TYPES): 6215 position = self._prev.text.upper() 6216 6217 this = self._parse_bitwise() 6218 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6219 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6220 expression = self._parse_bitwise() 6221 6222 if invert_order: 6223 this, expression = expression, this 6224 6225 if self._match(TokenType.COLLATE): 6226 collation = self._parse_bitwise() 6227 6228 return self.expression( 6229 exp.Trim, this=this, position=position, expression=expression, collation=collation 6230 ) 6231 6232 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6233 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6234 6235 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6236 return self._parse_window(self._parse_id_var(), alias=True) 6237 6238 def _parse_respect_or_ignore_nulls( 6239 self, this: t.Optional[exp.Expression] 6240 ) -> t.Optional[exp.Expression]: 6241 if self._match_text_seq("IGNORE", "NULLS"): 6242 return self.expression(exp.IgnoreNulls, this=this) 6243 if self._match_text_seq("RESPECT", "NULLS"): 6244 return self.expression(exp.RespectNulls, this=this) 6245 return this 6246 6247 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6248 if self._match(TokenType.HAVING): 6249 self._match_texts(("MAX", "MIN")) 6250 max = self._prev.text.upper() != "MIN" 6251 return self.expression( 6252 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6253 ) 6254 6255 return this 6256 6257 def _parse_window( 6258 self, this: t.Optional[exp.Expression], alias: bool = False 6259 ) -> t.Optional[exp.Expression]: 6260 func = this 6261 comments = func.comments if isinstance(func, exp.Expression) else None 6262 6263 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6264 self._match(TokenType.WHERE) 6265 this = self.expression( 6266 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6267 ) 6268 self._match_r_paren() 6269 6270 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6271 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6272 if self._match_text_seq("WITHIN", "GROUP"): 6273 order = self._parse_wrapped(self._parse_order) 6274 this = self.expression(exp.WithinGroup, this=this, expression=order) 6275 6276 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6277 # Some dialects choose to implement and some do not. 6278 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6279 6280 # There is some code above in _parse_lambda that handles 6281 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6282 6283 # The below changes handle 6284 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6285 6286 # Oracle allows both formats 6287 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6288 # and Snowflake chose to do the same for familiarity 6289 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6290 if isinstance(this, exp.AggFunc): 6291 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6292 6293 if ignore_respect and ignore_respect is not this: 6294 ignore_respect.replace(ignore_respect.this) 6295 this = self.expression(ignore_respect.__class__, this=this) 6296 6297 this = self._parse_respect_or_ignore_nulls(this) 6298 6299 # bigquery select from window x AS (partition by ...) 6300 if alias: 6301 over = None 6302 self._match(TokenType.ALIAS) 6303 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6304 return this 6305 else: 6306 over = self._prev.text.upper() 6307 6308 if comments and isinstance(func, exp.Expression): 6309 func.pop_comments() 6310 6311 if not self._match(TokenType.L_PAREN): 6312 return self.expression( 6313 exp.Window, 6314 comments=comments, 6315 this=this, 6316 alias=self._parse_id_var(False), 6317 over=over, 6318 ) 6319 6320 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6321 6322 first = self._match(TokenType.FIRST) 6323 if self._match_text_seq("LAST"): 6324 first = False 6325 6326 partition, order = self._parse_partition_and_order() 6327 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6328 6329 if kind: 6330 self._match(TokenType.BETWEEN) 6331 start = self._parse_window_spec() 6332 self._match(TokenType.AND) 6333 end = self._parse_window_spec() 6334 6335 spec = self.expression( 6336 exp.WindowSpec, 6337 kind=kind, 6338 start=start["value"], 6339 start_side=start["side"], 6340 end=end["value"], 6341 end_side=end["side"], 6342 ) 6343 else: 6344 spec = None 6345 6346 self._match_r_paren() 6347 6348 window = self.expression( 6349 exp.Window, 6350 comments=comments, 6351 this=this, 6352 partition_by=partition, 6353 order=order, 6354 spec=spec, 6355 alias=window_alias, 6356 over=over, 6357 first=first, 6358 ) 6359 6360 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6361 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6362 return self._parse_window(window, alias=alias) 6363 6364 return window 6365 6366 def _parse_partition_and_order( 6367 self, 6368 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6369 return self._parse_partition_by(), self._parse_order() 6370 6371 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6372 self._match(TokenType.BETWEEN) 6373 6374 return { 6375 "value": ( 6376 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6377 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6378 or self._parse_bitwise() 6379 ), 6380 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6381 } 6382 6383 def _parse_alias( 6384 self, this: t.Optional[exp.Expression], explicit: bool = False 6385 ) -> t.Optional[exp.Expression]: 6386 any_token = self._match(TokenType.ALIAS) 6387 comments = self._prev_comments or [] 6388 6389 if explicit and not any_token: 6390 return this 6391 6392 if self._match(TokenType.L_PAREN): 6393 aliases = self.expression( 6394 exp.Aliases, 6395 comments=comments, 6396 this=this, 6397 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6398 ) 6399 self._match_r_paren(aliases) 6400 return aliases 6401 6402 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6403 self.STRING_ALIASES and self._parse_string_as_identifier() 6404 ) 6405 6406 if alias: 6407 comments.extend(alias.pop_comments()) 6408 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6409 column = this.this 6410 6411 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6412 if not this.comments and column and column.comments: 6413 this.comments = column.pop_comments() 6414 6415 return this 6416 6417 def _parse_id_var( 6418 self, 6419 any_token: bool = True, 6420 tokens: t.Optional[t.Collection[TokenType]] = None, 6421 ) -> t.Optional[exp.Expression]: 6422 expression = self._parse_identifier() 6423 if not expression and ( 6424 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6425 ): 6426 quoted = self._prev.token_type == TokenType.STRING 6427 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6428 6429 return expression 6430 6431 def _parse_string(self) -> t.Optional[exp.Expression]: 6432 if self._match_set(self.STRING_PARSERS): 6433 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6434 return self._parse_placeholder() 6435 6436 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6437 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6438 6439 def _parse_number(self) -> t.Optional[exp.Expression]: 6440 if self._match_set(self.NUMERIC_PARSERS): 6441 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6442 return self._parse_placeholder() 6443 6444 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6445 if self._match(TokenType.IDENTIFIER): 6446 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6447 return self._parse_placeholder() 6448 6449 def _parse_var( 6450 self, 6451 any_token: bool = False, 6452 tokens: t.Optional[t.Collection[TokenType]] = None, 6453 upper: bool = False, 6454 ) -> t.Optional[exp.Expression]: 6455 if ( 6456 (any_token and self._advance_any()) 6457 or self._match(TokenType.VAR) 6458 or (self._match_set(tokens) if tokens else False) 6459 ): 6460 return self.expression( 6461 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6462 ) 6463 return self._parse_placeholder() 6464 6465 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6466 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6467 self._advance() 6468 return self._prev 6469 return None 6470 6471 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6472 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6473 6474 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6475 return self._parse_primary() or self._parse_var(any_token=True) 6476 6477 def _parse_null(self) -> t.Optional[exp.Expression]: 6478 if self._match_set(self.NULL_TOKENS): 6479 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6480 return self._parse_placeholder() 6481 6482 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6483 if self._match(TokenType.TRUE): 6484 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6485 if self._match(TokenType.FALSE): 6486 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6487 return self._parse_placeholder() 6488 6489 def _parse_star(self) -> t.Optional[exp.Expression]: 6490 if self._match(TokenType.STAR): 6491 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6492 return self._parse_placeholder() 6493 6494 def _parse_parameter(self) -> exp.Parameter: 6495 this = self._parse_identifier() or self._parse_primary_or_var() 6496 return self.expression(exp.Parameter, this=this) 6497 6498 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6499 if self._match_set(self.PLACEHOLDER_PARSERS): 6500 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6501 if placeholder: 6502 return placeholder 6503 self._advance(-1) 6504 return None 6505 6506 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6507 if not self._match_texts(keywords): 6508 return None 6509 if self._match(TokenType.L_PAREN, advance=False): 6510 return self._parse_wrapped_csv(self._parse_expression) 6511 6512 expression = self._parse_expression() 6513 return [expression] if expression else None 6514 6515 def _parse_csv( 6516 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6517 ) -> t.List[exp.Expression]: 6518 parse_result = parse_method() 6519 items = [parse_result] if parse_result is not None else [] 6520 6521 while self._match(sep): 6522 self._add_comments(parse_result) 6523 parse_result = parse_method() 6524 if parse_result is not None: 6525 items.append(parse_result) 6526 6527 return items 6528 6529 def _parse_tokens( 6530 self, parse_method: t.Callable, expressions: t.Dict 6531 ) -> t.Optional[exp.Expression]: 6532 this = parse_method() 6533 6534 while self._match_set(expressions): 6535 this = self.expression( 6536 expressions[self._prev.token_type], 6537 this=this, 6538 comments=self._prev_comments, 6539 expression=parse_method(), 6540 ) 6541 6542 return this 6543 6544 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6545 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6546 6547 def _parse_wrapped_csv( 6548 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6549 ) -> t.List[exp.Expression]: 6550 return self._parse_wrapped( 6551 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6552 ) 6553 6554 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6555 wrapped = self._match(TokenType.L_PAREN) 6556 if not wrapped and not optional: 6557 self.raise_error("Expecting (") 6558 parse_result = parse_method() 6559 if wrapped: 6560 self._match_r_paren() 6561 return parse_result 6562 6563 def _parse_expressions(self) -> t.List[exp.Expression]: 6564 return self._parse_csv(self._parse_expression) 6565 6566 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6567 return self._parse_select() or self._parse_set_operations( 6568 self._parse_expression() if alias else self._parse_assignment() 6569 ) 6570 6571 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6572 return self._parse_query_modifiers( 6573 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6574 ) 6575 6576 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6577 this = None 6578 if self._match_texts(self.TRANSACTION_KIND): 6579 this = self._prev.text 6580 6581 self._match_texts(("TRANSACTION", "WORK")) 6582 6583 modes = [] 6584 while True: 6585 mode = [] 6586 while self._match(TokenType.VAR): 6587 mode.append(self._prev.text) 6588 6589 if mode: 6590 modes.append(" ".join(mode)) 6591 if not self._match(TokenType.COMMA): 6592 break 6593 6594 return self.expression(exp.Transaction, this=this, modes=modes) 6595 6596 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6597 chain = None 6598 savepoint = None 6599 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6600 6601 self._match_texts(("TRANSACTION", "WORK")) 6602 6603 if self._match_text_seq("TO"): 6604 self._match_text_seq("SAVEPOINT") 6605 savepoint = self._parse_id_var() 6606 6607 if self._match(TokenType.AND): 6608 chain = not self._match_text_seq("NO") 6609 self._match_text_seq("CHAIN") 6610 6611 if is_rollback: 6612 return self.expression(exp.Rollback, savepoint=savepoint) 6613 6614 return self.expression(exp.Commit, chain=chain) 6615 6616 def _parse_refresh(self) -> exp.Refresh: 6617 self._match(TokenType.TABLE) 6618 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6619 6620 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6621 if not self._match_text_seq("ADD"): 6622 return None 6623 6624 self._match(TokenType.COLUMN) 6625 exists_column = self._parse_exists(not_=True) 6626 expression = self._parse_field_def() 6627 6628 if expression: 6629 expression.set("exists", exists_column) 6630 6631 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6632 if self._match_texts(("FIRST", "AFTER")): 6633 position = self._prev.text 6634 column_position = self.expression( 6635 exp.ColumnPosition, this=self._parse_column(), position=position 6636 ) 6637 expression.set("position", column_position) 6638 6639 return expression 6640 6641 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6642 drop = self._match(TokenType.DROP) and self._parse_drop() 6643 if drop and not isinstance(drop, exp.Command): 6644 drop.set("kind", drop.args.get("kind", "COLUMN")) 6645 return drop 6646 6647 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6648 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6649 return self.expression( 6650 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6651 ) 6652 6653 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6654 index = self._index - 1 6655 6656 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6657 return self._parse_csv( 6658 lambda: self.expression( 6659 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6660 ) 6661 ) 6662 6663 self._retreat(index) 6664 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6665 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6666 6667 if self._match_text_seq("ADD", "COLUMNS"): 6668 schema = self._parse_schema() 6669 if schema: 6670 return [schema] 6671 return [] 6672 6673 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6674 6675 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6676 if self._match_texts(self.ALTER_ALTER_PARSERS): 6677 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6678 6679 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6680 # keyword after ALTER we default to parsing this statement 6681 self._match(TokenType.COLUMN) 6682 column = self._parse_field(any_token=True) 6683 6684 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6685 return self.expression(exp.AlterColumn, this=column, drop=True) 6686 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6687 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6688 if self._match(TokenType.COMMENT): 6689 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6690 if self._match_text_seq("DROP", "NOT", "NULL"): 6691 return self.expression( 6692 exp.AlterColumn, 6693 this=column, 6694 drop=True, 6695 allow_null=True, 6696 ) 6697 if self._match_text_seq("SET", "NOT", "NULL"): 6698 return self.expression( 6699 exp.AlterColumn, 6700 this=column, 6701 allow_null=False, 6702 ) 6703 self._match_text_seq("SET", "DATA") 6704 self._match_text_seq("TYPE") 6705 return self.expression( 6706 exp.AlterColumn, 6707 this=column, 6708 dtype=self._parse_types(), 6709 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6710 using=self._match(TokenType.USING) and self._parse_assignment(), 6711 ) 6712 6713 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6714 if self._match_texts(("ALL", "EVEN", "AUTO")): 6715 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6716 6717 self._match_text_seq("KEY", "DISTKEY") 6718 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6719 6720 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6721 if compound: 6722 self._match_text_seq("SORTKEY") 6723 6724 if self._match(TokenType.L_PAREN, advance=False): 6725 return self.expression( 6726 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6727 ) 6728 6729 self._match_texts(("AUTO", "NONE")) 6730 return self.expression( 6731 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6732 ) 6733 6734 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6735 index = self._index - 1 6736 6737 partition_exists = self._parse_exists() 6738 if self._match(TokenType.PARTITION, advance=False): 6739 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6740 6741 self._retreat(index) 6742 return self._parse_csv(self._parse_drop_column) 6743 6744 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6745 if self._match(TokenType.COLUMN): 6746 exists = self._parse_exists() 6747 old_column = self._parse_column() 6748 to = self._match_text_seq("TO") 6749 new_column = self._parse_column() 6750 6751 if old_column is None or to is None or new_column is None: 6752 return None 6753 6754 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6755 6756 self._match_text_seq("TO") 6757 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6758 6759 def _parse_alter_table_set(self) -> exp.AlterSet: 6760 alter_set = self.expression(exp.AlterSet) 6761 6762 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6763 "TABLE", "PROPERTIES" 6764 ): 6765 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6766 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6767 alter_set.set("expressions", [self._parse_assignment()]) 6768 elif self._match_texts(("LOGGED", "UNLOGGED")): 6769 alter_set.set("option", exp.var(self._prev.text.upper())) 6770 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6771 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6772 elif self._match_text_seq("LOCATION"): 6773 alter_set.set("location", self._parse_field()) 6774 elif self._match_text_seq("ACCESS", "METHOD"): 6775 alter_set.set("access_method", self._parse_field()) 6776 elif self._match_text_seq("TABLESPACE"): 6777 alter_set.set("tablespace", self._parse_field()) 6778 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6779 alter_set.set("file_format", [self._parse_field()]) 6780 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6781 alter_set.set("file_format", self._parse_wrapped_options()) 6782 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6783 alter_set.set("copy_options", self._parse_wrapped_options()) 6784 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6785 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6786 else: 6787 if self._match_text_seq("SERDE"): 6788 alter_set.set("serde", self._parse_field()) 6789 6790 alter_set.set("expressions", [self._parse_properties()]) 6791 6792 return alter_set 6793 6794 def _parse_alter(self) -> exp.Alter | exp.Command: 6795 start = self._prev 6796 6797 alter_token = self._match_set(self.ALTERABLES) and self._prev 6798 if not alter_token: 6799 return self._parse_as_command(start) 6800 6801 exists = self._parse_exists() 6802 only = self._match_text_seq("ONLY") 6803 this = self._parse_table(schema=True) 6804 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6805 6806 if self._next: 6807 self._advance() 6808 6809 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6810 if parser: 6811 actions = ensure_list(parser(self)) 6812 not_valid = self._match_text_seq("NOT", "VALID") 6813 options = self._parse_csv(self._parse_property) 6814 6815 if not self._curr and actions: 6816 return self.expression( 6817 exp.Alter, 6818 this=this, 6819 kind=alter_token.text.upper(), 6820 exists=exists, 6821 actions=actions, 6822 only=only, 6823 options=options, 6824 cluster=cluster, 6825 not_valid=not_valid, 6826 ) 6827 6828 return self._parse_as_command(start) 6829 6830 def _parse_merge(self) -> exp.Merge: 6831 self._match(TokenType.INTO) 6832 target = self._parse_table() 6833 6834 if target and self._match(TokenType.ALIAS, advance=False): 6835 target.set("alias", self._parse_table_alias()) 6836 6837 self._match(TokenType.USING) 6838 using = self._parse_table() 6839 6840 self._match(TokenType.ON) 6841 on = self._parse_assignment() 6842 6843 return self.expression( 6844 exp.Merge, 6845 this=target, 6846 using=using, 6847 on=on, 6848 expressions=self._parse_when_matched(), 6849 returning=self._parse_returning(), 6850 ) 6851 6852 def _parse_when_matched(self) -> t.List[exp.When]: 6853 whens = [] 6854 6855 while self._match(TokenType.WHEN): 6856 matched = not self._match(TokenType.NOT) 6857 self._match_text_seq("MATCHED") 6858 source = ( 6859 False 6860 if self._match_text_seq("BY", "TARGET") 6861 else self._match_text_seq("BY", "SOURCE") 6862 ) 6863 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6864 6865 self._match(TokenType.THEN) 6866 6867 if self._match(TokenType.INSERT): 6868 this = self._parse_star() 6869 if this: 6870 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6871 else: 6872 then = self.expression( 6873 exp.Insert, 6874 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6875 expression=self._match_text_seq("VALUES") and self._parse_value(), 6876 ) 6877 elif self._match(TokenType.UPDATE): 6878 expressions = self._parse_star() 6879 if expressions: 6880 then = self.expression(exp.Update, expressions=expressions) 6881 else: 6882 then = self.expression( 6883 exp.Update, 6884 expressions=self._match(TokenType.SET) 6885 and self._parse_csv(self._parse_equality), 6886 ) 6887 elif self._match(TokenType.DELETE): 6888 then = self.expression(exp.Var, this=self._prev.text) 6889 else: 6890 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6891 6892 whens.append( 6893 self.expression( 6894 exp.When, 6895 matched=matched, 6896 source=source, 6897 condition=condition, 6898 then=then, 6899 ) 6900 ) 6901 return whens 6902 6903 def _parse_show(self) -> t.Optional[exp.Expression]: 6904 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6905 if parser: 6906 return parser(self) 6907 return self._parse_as_command(self._prev) 6908 6909 def _parse_set_item_assignment( 6910 self, kind: t.Optional[str] = None 6911 ) -> t.Optional[exp.Expression]: 6912 index = self._index 6913 6914 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6915 return self._parse_set_transaction(global_=kind == "GLOBAL") 6916 6917 left = self._parse_primary() or self._parse_column() 6918 assignment_delimiter = self._match_texts(("=", "TO")) 6919 6920 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6921 self._retreat(index) 6922 return None 6923 6924 right = self._parse_statement() or self._parse_id_var() 6925 if isinstance(right, (exp.Column, exp.Identifier)): 6926 right = exp.var(right.name) 6927 6928 this = self.expression(exp.EQ, this=left, expression=right) 6929 return self.expression(exp.SetItem, this=this, kind=kind) 6930 6931 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6932 self._match_text_seq("TRANSACTION") 6933 characteristics = self._parse_csv( 6934 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6935 ) 6936 return self.expression( 6937 exp.SetItem, 6938 expressions=characteristics, 6939 kind="TRANSACTION", 6940 **{"global": global_}, # type: ignore 6941 ) 6942 6943 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6944 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6945 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6946 6947 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6948 index = self._index 6949 set_ = self.expression( 6950 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6951 ) 6952 6953 if self._curr: 6954 self._retreat(index) 6955 return self._parse_as_command(self._prev) 6956 6957 return set_ 6958 6959 def _parse_var_from_options( 6960 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6961 ) -> t.Optional[exp.Var]: 6962 start = self._curr 6963 if not start: 6964 return None 6965 6966 option = start.text.upper() 6967 continuations = options.get(option) 6968 6969 index = self._index 6970 self._advance() 6971 for keywords in continuations or []: 6972 if isinstance(keywords, str): 6973 keywords = (keywords,) 6974 6975 if self._match_text_seq(*keywords): 6976 option = f"{option} {' '.join(keywords)}" 6977 break 6978 else: 6979 if continuations or continuations is None: 6980 if raise_unmatched: 6981 self.raise_error(f"Unknown option {option}") 6982 6983 self._retreat(index) 6984 return None 6985 6986 return exp.var(option) 6987 6988 def _parse_as_command(self, start: Token) -> exp.Command: 6989 while self._curr: 6990 self._advance() 6991 text = self._find_sql(start, self._prev) 6992 size = len(start.text) 6993 self._warn_unsupported() 6994 return exp.Command(this=text[:size], expression=text[size:]) 6995 6996 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6997 settings = [] 6998 6999 self._match_l_paren() 7000 kind = self._parse_id_var() 7001 7002 if self._match(TokenType.L_PAREN): 7003 while True: 7004 key = self._parse_id_var() 7005 value = self._parse_primary() 7006 7007 if not key and value is None: 7008 break 7009 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7010 self._match(TokenType.R_PAREN) 7011 7012 self._match_r_paren() 7013 7014 return self.expression( 7015 exp.DictProperty, 7016 this=this, 7017 kind=kind.this if kind else None, 7018 settings=settings, 7019 ) 7020 7021 def _parse_dict_range(self, this: str) -> exp.DictRange: 7022 self._match_l_paren() 7023 has_min = self._match_text_seq("MIN") 7024 if has_min: 7025 min = self._parse_var() or self._parse_primary() 7026 self._match_text_seq("MAX") 7027 max = self._parse_var() or self._parse_primary() 7028 else: 7029 max = self._parse_var() or self._parse_primary() 7030 min = exp.Literal.number(0) 7031 self._match_r_paren() 7032 return self.expression(exp.DictRange, this=this, min=min, max=max) 7033 7034 def _parse_comprehension( 7035 self, this: t.Optional[exp.Expression] 7036 ) -> t.Optional[exp.Comprehension]: 7037 index = self._index 7038 expression = self._parse_column() 7039 if not self._match(TokenType.IN): 7040 self._retreat(index - 1) 7041 return None 7042 iterator = self._parse_column() 7043 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7044 return self.expression( 7045 exp.Comprehension, 7046 this=this, 7047 expression=expression, 7048 iterator=iterator, 7049 condition=condition, 7050 ) 7051 7052 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7053 if self._match(TokenType.HEREDOC_STRING): 7054 return self.expression(exp.Heredoc, this=self._prev.text) 7055 7056 if not self._match_text_seq("$"): 7057 return None 7058 7059 tags = ["$"] 7060 tag_text = None 7061 7062 if self._is_connected(): 7063 self._advance() 7064 tags.append(self._prev.text.upper()) 7065 else: 7066 self.raise_error("No closing $ found") 7067 7068 if tags[-1] != "$": 7069 if self._is_connected() and self._match_text_seq("$"): 7070 tag_text = tags[-1] 7071 tags.append("$") 7072 else: 7073 self.raise_error("No closing $ found") 7074 7075 heredoc_start = self._curr 7076 7077 while self._curr: 7078 if self._match_text_seq(*tags, advance=False): 7079 this = self._find_sql(heredoc_start, self._prev) 7080 self._advance(len(tags)) 7081 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7082 7083 self._advance() 7084 7085 self.raise_error(f"No closing {''.join(tags)} found") 7086 return None 7087 7088 def _find_parser( 7089 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7090 ) -> t.Optional[t.Callable]: 7091 if not self._curr: 7092 return None 7093 7094 index = self._index 7095 this = [] 7096 while True: 7097 # The current token might be multiple words 7098 curr = self._curr.text.upper() 7099 key = curr.split(" ") 7100 this.append(curr) 7101 7102 self._advance() 7103 result, trie = in_trie(trie, key) 7104 if result == TrieResult.FAILED: 7105 break 7106 7107 if result == TrieResult.EXISTS: 7108 subparser = parsers[" ".join(this)] 7109 return subparser 7110 7111 self._retreat(index) 7112 return None 7113 7114 def _match(self, token_type, advance=True, expression=None): 7115 if not self._curr: 7116 return None 7117 7118 if self._curr.token_type == token_type: 7119 if advance: 7120 self._advance() 7121 self._add_comments(expression) 7122 return True 7123 7124 return None 7125 7126 def _match_set(self, types, advance=True): 7127 if not self._curr: 7128 return None 7129 7130 if self._curr.token_type in types: 7131 if advance: 7132 self._advance() 7133 return True 7134 7135 return None 7136 7137 def _match_pair(self, token_type_a, token_type_b, advance=True): 7138 if not self._curr or not self._next: 7139 return None 7140 7141 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7142 if advance: 7143 self._advance(2) 7144 return True 7145 7146 return None 7147 7148 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7149 if not self._match(TokenType.L_PAREN, expression=expression): 7150 self.raise_error("Expecting (") 7151 7152 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7153 if not self._match(TokenType.R_PAREN, expression=expression): 7154 self.raise_error("Expecting )") 7155 7156 def _match_texts(self, texts, advance=True): 7157 if ( 7158 self._curr 7159 and self._curr.token_type != TokenType.STRING 7160 and self._curr.text.upper() in texts 7161 ): 7162 if advance: 7163 self._advance() 7164 return True 7165 return None 7166 7167 def _match_text_seq(self, *texts, advance=True): 7168 index = self._index 7169 for text in texts: 7170 if ( 7171 self._curr 7172 and self._curr.token_type != TokenType.STRING 7173 and self._curr.text.upper() == text 7174 ): 7175 self._advance() 7176 else: 7177 self._retreat(index) 7178 return None 7179 7180 if not advance: 7181 self._retreat(index) 7182 7183 return True 7184 7185 def _replace_lambda( 7186 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7187 ) -> t.Optional[exp.Expression]: 7188 if not node: 7189 return node 7190 7191 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7192 7193 for column in node.find_all(exp.Column): 7194 typ = lambda_types.get(column.parts[0].name) 7195 if typ is not None: 7196 dot_or_id = column.to_dot() if column.table else column.this 7197 7198 if typ: 7199 dot_or_id = self.expression( 7200 exp.Cast, 7201 this=dot_or_id, 7202 to=typ, 7203 ) 7204 7205 parent = column.parent 7206 7207 while isinstance(parent, exp.Dot): 7208 if not isinstance(parent.parent, exp.Dot): 7209 parent.replace(dot_or_id) 7210 break 7211 parent = parent.parent 7212 else: 7213 if column is node: 7214 node = dot_or_id 7215 else: 7216 column.replace(dot_or_id) 7217 return node 7218 7219 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7220 start = self._prev 7221 7222 # Not to be confused with TRUNCATE(number, decimals) function call 7223 if self._match(TokenType.L_PAREN): 7224 self._retreat(self._index - 2) 7225 return self._parse_function() 7226 7227 # Clickhouse supports TRUNCATE DATABASE as well 7228 is_database = self._match(TokenType.DATABASE) 7229 7230 self._match(TokenType.TABLE) 7231 7232 exists = self._parse_exists(not_=False) 7233 7234 expressions = self._parse_csv( 7235 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7236 ) 7237 7238 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7239 7240 if self._match_text_seq("RESTART", "IDENTITY"): 7241 identity = "RESTART" 7242 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7243 identity = "CONTINUE" 7244 else: 7245 identity = None 7246 7247 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7248 option = self._prev.text 7249 else: 7250 option = None 7251 7252 partition = self._parse_partition() 7253 7254 # Fallback case 7255 if self._curr: 7256 return self._parse_as_command(start) 7257 7258 return self.expression( 7259 exp.TruncateTable, 7260 expressions=expressions, 7261 is_database=is_database, 7262 exists=exists, 7263 cluster=cluster, 7264 identity=identity, 7265 option=option, 7266 partition=partition, 7267 ) 7268 7269 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7270 this = self._parse_ordered(self._parse_opclass) 7271 7272 if not self._match(TokenType.WITH): 7273 return this 7274 7275 op = self._parse_var(any_token=True) 7276 7277 return self.expression(exp.WithOperator, this=this, op=op) 7278 7279 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7280 self._match(TokenType.EQ) 7281 self._match(TokenType.L_PAREN) 7282 7283 opts: t.List[t.Optional[exp.Expression]] = [] 7284 while self._curr and not self._match(TokenType.R_PAREN): 7285 if self._match_text_seq("FORMAT_NAME", "="): 7286 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7287 # so we parse it separately to use _parse_field() 7288 prop = self.expression( 7289 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7290 ) 7291 opts.append(prop) 7292 else: 7293 opts.append(self._parse_property()) 7294 7295 self._match(TokenType.COMMA) 7296 7297 return opts 7298 7299 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7300 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7301 7302 options = [] 7303 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7304 option = self._parse_var(any_token=True) 7305 prev = self._prev.text.upper() 7306 7307 # Different dialects might separate options and values by white space, "=" and "AS" 7308 self._match(TokenType.EQ) 7309 self._match(TokenType.ALIAS) 7310 7311 param = self.expression(exp.CopyParameter, this=option) 7312 7313 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7314 TokenType.L_PAREN, advance=False 7315 ): 7316 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7317 param.set("expressions", self._parse_wrapped_options()) 7318 elif prev == "FILE_FORMAT": 7319 # T-SQL's external file format case 7320 param.set("expression", self._parse_field()) 7321 else: 7322 param.set("expression", self._parse_unquoted_field()) 7323 7324 options.append(param) 7325 self._match(sep) 7326 7327 return options 7328 7329 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7330 expr = self.expression(exp.Credentials) 7331 7332 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7333 expr.set("storage", self._parse_field()) 7334 if self._match_text_seq("CREDENTIALS"): 7335 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7336 creds = ( 7337 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7338 ) 7339 expr.set("credentials", creds) 7340 if self._match_text_seq("ENCRYPTION"): 7341 expr.set("encryption", self._parse_wrapped_options()) 7342 if self._match_text_seq("IAM_ROLE"): 7343 expr.set("iam_role", self._parse_field()) 7344 if self._match_text_seq("REGION"): 7345 expr.set("region", self._parse_field()) 7346 7347 return expr 7348 7349 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7350 return self._parse_field() 7351 7352 def _parse_copy(self) -> exp.Copy | exp.Command: 7353 start = self._prev 7354 7355 self._match(TokenType.INTO) 7356 7357 this = ( 7358 self._parse_select(nested=True, parse_subquery_alias=False) 7359 if self._match(TokenType.L_PAREN, advance=False) 7360 else self._parse_table(schema=True) 7361 ) 7362 7363 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7364 7365 files = self._parse_csv(self._parse_file_location) 7366 credentials = self._parse_credentials() 7367 7368 self._match_text_seq("WITH") 7369 7370 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7371 7372 # Fallback case 7373 if self._curr: 7374 return self._parse_as_command(start) 7375 7376 return self.expression( 7377 exp.Copy, 7378 this=this, 7379 kind=kind, 7380 credentials=credentials, 7381 files=files, 7382 params=params, 7383 ) 7384 7385 def _parse_normalize(self) -> exp.Normalize: 7386 return self.expression( 7387 exp.Normalize, 7388 this=self._parse_bitwise(), 7389 form=self._match(TokenType.COMMA) and self._parse_var(), 7390 ) 7391 7392 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7393 if self._match_text_seq("COLUMNS", "(", advance=False): 7394 return exp.UnpackColumns(this=self._parse_function()) 7395 7396 return self.expression( 7397 exp.Star, 7398 **{ # type: ignore 7399 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7400 "replace": self._parse_star_op("REPLACE"), 7401 "rename": self._parse_star_op("RENAME"), 7402 }, 7403 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 *STRUCT_TYPE_TOKENS, 274 } 275 276 ENUM_TYPE_TOKENS = { 277 TokenType.ENUM, 278 TokenType.ENUM8, 279 TokenType.ENUM16, 280 } 281 282 AGGREGATE_TYPE_TOKENS = { 283 TokenType.AGGREGATEFUNCTION, 284 TokenType.SIMPLEAGGREGATEFUNCTION, 285 } 286 287 TYPE_TOKENS = { 288 TokenType.BIT, 289 TokenType.BOOLEAN, 290 TokenType.TINYINT, 291 TokenType.UTINYINT, 292 TokenType.SMALLINT, 293 TokenType.USMALLINT, 294 TokenType.INT, 295 TokenType.UINT, 296 TokenType.BIGINT, 297 TokenType.UBIGINT, 298 TokenType.INT128, 299 TokenType.UINT128, 300 TokenType.INT256, 301 TokenType.UINT256, 302 TokenType.MEDIUMINT, 303 TokenType.UMEDIUMINT, 304 TokenType.FIXEDSTRING, 305 TokenType.FLOAT, 306 TokenType.DOUBLE, 307 TokenType.CHAR, 308 TokenType.NCHAR, 309 TokenType.VARCHAR, 310 TokenType.NVARCHAR, 311 TokenType.BPCHAR, 312 TokenType.TEXT, 313 TokenType.MEDIUMTEXT, 314 TokenType.LONGTEXT, 315 TokenType.MEDIUMBLOB, 316 TokenType.LONGBLOB, 317 TokenType.BINARY, 318 TokenType.VARBINARY, 319 TokenType.JSON, 320 TokenType.JSONB, 321 TokenType.INTERVAL, 322 TokenType.TINYBLOB, 323 TokenType.TINYTEXT, 324 TokenType.TIME, 325 TokenType.TIMETZ, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMP_S, 328 TokenType.TIMESTAMP_MS, 329 TokenType.TIMESTAMP_NS, 330 TokenType.TIMESTAMPTZ, 331 TokenType.TIMESTAMPLTZ, 332 TokenType.TIMESTAMPNTZ, 333 TokenType.DATETIME, 334 TokenType.DATETIME64, 335 TokenType.DATE, 336 TokenType.DATE32, 337 TokenType.INT4RANGE, 338 TokenType.INT4MULTIRANGE, 339 TokenType.INT8RANGE, 340 TokenType.INT8MULTIRANGE, 341 TokenType.NUMRANGE, 342 TokenType.NUMMULTIRANGE, 343 TokenType.TSRANGE, 344 TokenType.TSMULTIRANGE, 345 TokenType.TSTZRANGE, 346 TokenType.TSTZMULTIRANGE, 347 TokenType.DATERANGE, 348 TokenType.DATEMULTIRANGE, 349 TokenType.DECIMAL, 350 TokenType.DECIMAL32, 351 TokenType.DECIMAL64, 352 TokenType.DECIMAL128, 353 TokenType.UDECIMAL, 354 TokenType.BIGDECIMAL, 355 TokenType.UUID, 356 TokenType.GEOGRAPHY, 357 TokenType.GEOMETRY, 358 TokenType.HLLSKETCH, 359 TokenType.HSTORE, 360 TokenType.PSEUDO_TYPE, 361 TokenType.SUPER, 362 TokenType.SERIAL, 363 TokenType.SMALLSERIAL, 364 TokenType.BIGSERIAL, 365 TokenType.XML, 366 TokenType.YEAR, 367 TokenType.UNIQUEIDENTIFIER, 368 TokenType.USERDEFINED, 369 TokenType.MONEY, 370 TokenType.SMALLMONEY, 371 TokenType.ROWVERSION, 372 TokenType.IMAGE, 373 TokenType.VARIANT, 374 TokenType.VECTOR, 375 TokenType.OBJECT, 376 TokenType.OBJECT_IDENTIFIER, 377 TokenType.INET, 378 TokenType.IPADDRESS, 379 TokenType.IPPREFIX, 380 TokenType.IPV4, 381 TokenType.IPV6, 382 TokenType.UNKNOWN, 383 TokenType.NULL, 384 TokenType.NAME, 385 TokenType.TDIGEST, 386 *ENUM_TYPE_TOKENS, 387 *NESTED_TYPE_TOKENS, 388 *AGGREGATE_TYPE_TOKENS, 389 } 390 391 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 392 TokenType.BIGINT: TokenType.UBIGINT, 393 TokenType.INT: TokenType.UINT, 394 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 395 TokenType.SMALLINT: TokenType.USMALLINT, 396 TokenType.TINYINT: TokenType.UTINYINT, 397 TokenType.DECIMAL: TokenType.UDECIMAL, 398 } 399 400 SUBQUERY_PREDICATES = { 401 TokenType.ANY: exp.Any, 402 TokenType.ALL: exp.All, 403 TokenType.EXISTS: exp.Exists, 404 TokenType.SOME: exp.Any, 405 } 406 407 RESERVED_TOKENS = { 408 *Tokenizer.SINGLE_TOKENS.values(), 409 TokenType.SELECT, 410 } - {TokenType.IDENTIFIER} 411 412 DB_CREATABLES = { 413 TokenType.DATABASE, 414 TokenType.DICTIONARY, 415 TokenType.MODEL, 416 TokenType.SCHEMA, 417 TokenType.SEQUENCE, 418 TokenType.STORAGE_INTEGRATION, 419 TokenType.TABLE, 420 TokenType.TAG, 421 TokenType.VIEW, 422 TokenType.WAREHOUSE, 423 TokenType.STREAMLIT, 424 } 425 426 CREATABLES = { 427 TokenType.COLUMN, 428 TokenType.CONSTRAINT, 429 TokenType.FOREIGN_KEY, 430 TokenType.FUNCTION, 431 TokenType.INDEX, 432 TokenType.PROCEDURE, 433 *DB_CREATABLES, 434 } 435 436 ALTERABLES = { 437 TokenType.INDEX, 438 TokenType.TABLE, 439 TokenType.VIEW, 440 } 441 442 # Tokens that can represent identifiers 443 ID_VAR_TOKENS = { 444 TokenType.ALL, 445 TokenType.VAR, 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASC, 449 TokenType.ASOF, 450 TokenType.AUTO_INCREMENT, 451 TokenType.BEGIN, 452 TokenType.BPCHAR, 453 TokenType.CACHE, 454 TokenType.CASE, 455 TokenType.COLLATE, 456 TokenType.COMMAND, 457 TokenType.COMMENT, 458 TokenType.COMMIT, 459 TokenType.CONSTRAINT, 460 TokenType.COPY, 461 TokenType.CUBE, 462 TokenType.DEFAULT, 463 TokenType.DELETE, 464 TokenType.DESC, 465 TokenType.DESCRIBE, 466 TokenType.DICTIONARY, 467 TokenType.DIV, 468 TokenType.END, 469 TokenType.EXECUTE, 470 TokenType.ESCAPE, 471 TokenType.FALSE, 472 TokenType.FIRST, 473 TokenType.FILTER, 474 TokenType.FINAL, 475 TokenType.FORMAT, 476 TokenType.FULL, 477 TokenType.IDENTIFIER, 478 TokenType.IS, 479 TokenType.ISNULL, 480 TokenType.INTERVAL, 481 TokenType.KEEP, 482 TokenType.KILL, 483 TokenType.LEFT, 484 TokenType.LOAD, 485 TokenType.MERGE, 486 TokenType.NATURAL, 487 TokenType.NEXT, 488 TokenType.OFFSET, 489 TokenType.OPERATOR, 490 TokenType.ORDINALITY, 491 TokenType.OVERLAPS, 492 TokenType.OVERWRITE, 493 TokenType.PARTITION, 494 TokenType.PERCENT, 495 TokenType.PIVOT, 496 TokenType.PRAGMA, 497 TokenType.RANGE, 498 TokenType.RECURSIVE, 499 TokenType.REFERENCES, 500 TokenType.REFRESH, 501 TokenType.RENAME, 502 TokenType.REPLACE, 503 TokenType.RIGHT, 504 TokenType.ROLLUP, 505 TokenType.ROW, 506 TokenType.ROWS, 507 TokenType.SEMI, 508 TokenType.SET, 509 TokenType.SETTINGS, 510 TokenType.SHOW, 511 TokenType.TEMPORARY, 512 TokenType.TOP, 513 TokenType.TRUE, 514 TokenType.TRUNCATE, 515 TokenType.UNIQUE, 516 TokenType.UNNEST, 517 TokenType.UNPIVOT, 518 TokenType.UPDATE, 519 TokenType.USE, 520 TokenType.VOLATILE, 521 TokenType.WINDOW, 522 *CREATABLES, 523 *SUBQUERY_PREDICATES, 524 *TYPE_TOKENS, 525 *NO_PAREN_FUNCTIONS, 526 } 527 ID_VAR_TOKENS.remove(TokenType.UNION) 528 529 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 530 531 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 532 TokenType.ANTI, 533 TokenType.APPLY, 534 TokenType.ASOF, 535 TokenType.FULL, 536 TokenType.LEFT, 537 TokenType.LOCK, 538 TokenType.NATURAL, 539 TokenType.OFFSET, 540 TokenType.RIGHT, 541 TokenType.SEMI, 542 TokenType.WINDOW, 543 } 544 545 ALIAS_TOKENS = ID_VAR_TOKENS 546 547 ARRAY_CONSTRUCTORS = { 548 "ARRAY": exp.Array, 549 "LIST": exp.List, 550 } 551 552 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 553 554 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 555 556 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 557 558 FUNC_TOKENS = { 559 TokenType.COLLATE, 560 TokenType.COMMAND, 561 TokenType.CURRENT_DATE, 562 TokenType.CURRENT_DATETIME, 563 TokenType.CURRENT_TIMESTAMP, 564 TokenType.CURRENT_TIME, 565 TokenType.CURRENT_USER, 566 TokenType.FILTER, 567 TokenType.FIRST, 568 TokenType.FORMAT, 569 TokenType.GLOB, 570 TokenType.IDENTIFIER, 571 TokenType.INDEX, 572 TokenType.ISNULL, 573 TokenType.ILIKE, 574 TokenType.INSERT, 575 TokenType.LIKE, 576 TokenType.MERGE, 577 TokenType.OFFSET, 578 TokenType.PRIMARY_KEY, 579 TokenType.RANGE, 580 TokenType.REPLACE, 581 TokenType.RLIKE, 582 TokenType.ROW, 583 TokenType.UNNEST, 584 TokenType.VAR, 585 TokenType.LEFT, 586 TokenType.RIGHT, 587 TokenType.SEQUENCE, 588 TokenType.DATE, 589 TokenType.DATETIME, 590 TokenType.TABLE, 591 TokenType.TIMESTAMP, 592 TokenType.TIMESTAMPTZ, 593 TokenType.TRUNCATE, 594 TokenType.WINDOW, 595 TokenType.XOR, 596 *TYPE_TOKENS, 597 *SUBQUERY_PREDICATES, 598 } 599 600 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 601 TokenType.AND: exp.And, 602 } 603 604 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 605 TokenType.COLON_EQ: exp.PropertyEQ, 606 } 607 608 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.OR: exp.Or, 610 } 611 612 EQUALITY = { 613 TokenType.EQ: exp.EQ, 614 TokenType.NEQ: exp.NEQ, 615 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 616 } 617 618 COMPARISON = { 619 TokenType.GT: exp.GT, 620 TokenType.GTE: exp.GTE, 621 TokenType.LT: exp.LT, 622 TokenType.LTE: exp.LTE, 623 } 624 625 BITWISE = { 626 TokenType.AMP: exp.BitwiseAnd, 627 TokenType.CARET: exp.BitwiseXor, 628 TokenType.PIPE: exp.BitwiseOr, 629 } 630 631 TERM = { 632 TokenType.DASH: exp.Sub, 633 TokenType.PLUS: exp.Add, 634 TokenType.MOD: exp.Mod, 635 TokenType.COLLATE: exp.Collate, 636 } 637 638 FACTOR = { 639 TokenType.DIV: exp.IntDiv, 640 TokenType.LR_ARROW: exp.Distance, 641 TokenType.SLASH: exp.Div, 642 TokenType.STAR: exp.Mul, 643 } 644 645 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 646 647 TIMES = { 648 TokenType.TIME, 649 TokenType.TIMETZ, 650 } 651 652 TIMESTAMPS = { 653 TokenType.TIMESTAMP, 654 TokenType.TIMESTAMPTZ, 655 TokenType.TIMESTAMPLTZ, 656 *TIMES, 657 } 658 659 SET_OPERATIONS = { 660 TokenType.UNION, 661 TokenType.INTERSECT, 662 TokenType.EXCEPT, 663 } 664 665 JOIN_METHODS = { 666 TokenType.ASOF, 667 TokenType.NATURAL, 668 TokenType.POSITIONAL, 669 } 670 671 JOIN_SIDES = { 672 TokenType.LEFT, 673 TokenType.RIGHT, 674 TokenType.FULL, 675 } 676 677 JOIN_KINDS = { 678 TokenType.ANTI, 679 TokenType.CROSS, 680 TokenType.INNER, 681 TokenType.OUTER, 682 TokenType.SEMI, 683 TokenType.STRAIGHT_JOIN, 684 } 685 686 JOIN_HINTS: t.Set[str] = set() 687 688 LAMBDAS = { 689 TokenType.ARROW: lambda self, expressions: self.expression( 690 exp.Lambda, 691 this=self._replace_lambda( 692 self._parse_assignment(), 693 expressions, 694 ), 695 expressions=expressions, 696 ), 697 TokenType.FARROW: lambda self, expressions: self.expression( 698 exp.Kwarg, 699 this=exp.var(expressions[0].name), 700 expression=self._parse_assignment(), 701 ), 702 } 703 704 COLUMN_OPERATORS = { 705 TokenType.DOT: None, 706 TokenType.DCOLON: lambda self, this, to: self.expression( 707 exp.Cast if self.STRICT_CAST else exp.TryCast, 708 this=this, 709 to=to, 710 ), 711 TokenType.ARROW: lambda self, this, path: self.expression( 712 exp.JSONExtract, 713 this=this, 714 expression=self.dialect.to_json_path(path), 715 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 716 ), 717 TokenType.DARROW: lambda self, this, path: self.expression( 718 exp.JSONExtractScalar, 719 this=this, 720 expression=self.dialect.to_json_path(path), 721 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 722 ), 723 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 724 exp.JSONBExtract, 725 this=this, 726 expression=path, 727 ), 728 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 729 exp.JSONBExtractScalar, 730 this=this, 731 expression=path, 732 ), 733 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 734 exp.JSONBContains, 735 this=this, 736 expression=key, 737 ), 738 } 739 740 EXPRESSION_PARSERS = { 741 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 742 exp.Column: lambda self: self._parse_column(), 743 exp.Condition: lambda self: self._parse_assignment(), 744 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 745 exp.Expression: lambda self: self._parse_expression(), 746 exp.From: lambda self: self._parse_from(joins=True), 747 exp.Group: lambda self: self._parse_group(), 748 exp.Having: lambda self: self._parse_having(), 749 exp.Identifier: lambda self: self._parse_id_var(), 750 exp.Join: lambda self: self._parse_join(), 751 exp.Lambda: lambda self: self._parse_lambda(), 752 exp.Lateral: lambda self: self._parse_lateral(), 753 exp.Limit: lambda self: self._parse_limit(), 754 exp.Offset: lambda self: self._parse_offset(), 755 exp.Order: lambda self: self._parse_order(), 756 exp.Ordered: lambda self: self._parse_ordered(), 757 exp.Properties: lambda self: self._parse_properties(), 758 exp.Qualify: lambda self: self._parse_qualify(), 759 exp.Returning: lambda self: self._parse_returning(), 760 exp.Select: lambda self: self._parse_select(), 761 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 762 exp.Table: lambda self: self._parse_table_parts(), 763 exp.TableAlias: lambda self: self._parse_table_alias(), 764 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 765 exp.Where: lambda self: self._parse_where(), 766 exp.Window: lambda self: self._parse_named_window(), 767 exp.With: lambda self: self._parse_with(), 768 "JOIN_TYPE": lambda self: self._parse_join_parts(), 769 } 770 771 STATEMENT_PARSERS = { 772 TokenType.ALTER: lambda self: self._parse_alter(), 773 TokenType.BEGIN: lambda self: self._parse_transaction(), 774 TokenType.CACHE: lambda self: self._parse_cache(), 775 TokenType.COMMENT: lambda self: self._parse_comment(), 776 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 777 TokenType.COPY: lambda self: self._parse_copy(), 778 TokenType.CREATE: lambda self: self._parse_create(), 779 TokenType.DELETE: lambda self: self._parse_delete(), 780 TokenType.DESC: lambda self: self._parse_describe(), 781 TokenType.DESCRIBE: lambda self: self._parse_describe(), 782 TokenType.DROP: lambda self: self._parse_drop(), 783 TokenType.INSERT: lambda self: self._parse_insert(), 784 TokenType.KILL: lambda self: self._parse_kill(), 785 TokenType.LOAD: lambda self: self._parse_load(), 786 TokenType.MERGE: lambda self: self._parse_merge(), 787 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 788 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 789 TokenType.REFRESH: lambda self: self._parse_refresh(), 790 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 791 TokenType.SET: lambda self: self._parse_set(), 792 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 793 TokenType.UNCACHE: lambda self: self._parse_uncache(), 794 TokenType.UPDATE: lambda self: self._parse_update(), 795 TokenType.USE: lambda self: self.expression( 796 exp.Use, 797 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 798 this=self._parse_table(schema=False), 799 ), 800 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 801 } 802 803 UNARY_PARSERS = { 804 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 805 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 806 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 807 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 808 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 809 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 810 } 811 812 STRING_PARSERS = { 813 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 814 exp.RawString, this=token.text 815 ), 816 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 817 exp.National, this=token.text 818 ), 819 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 820 TokenType.STRING: lambda self, token: self.expression( 821 exp.Literal, this=token.text, is_string=True 822 ), 823 TokenType.UNICODE_STRING: lambda self, token: self.expression( 824 exp.UnicodeString, 825 this=token.text, 826 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 827 ), 828 } 829 830 NUMERIC_PARSERS = { 831 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 832 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 833 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 834 TokenType.NUMBER: lambda self, token: self.expression( 835 exp.Literal, this=token.text, is_string=False 836 ), 837 } 838 839 PRIMARY_PARSERS = { 840 **STRING_PARSERS, 841 **NUMERIC_PARSERS, 842 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 843 TokenType.NULL: lambda self, _: self.expression(exp.Null), 844 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 845 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 846 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 847 TokenType.STAR: lambda self, _: self._parse_star_ops(), 848 } 849 850 PLACEHOLDER_PARSERS = { 851 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 852 TokenType.PARAMETER: lambda self: self._parse_parameter(), 853 TokenType.COLON: lambda self: ( 854 self.expression(exp.Placeholder, this=self._prev.text) 855 if self._match_set(self.ID_VAR_TOKENS) 856 else None 857 ), 858 } 859 860 RANGE_PARSERS = { 861 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 862 TokenType.GLOB: binary_range_parser(exp.Glob), 863 TokenType.ILIKE: binary_range_parser(exp.ILike), 864 TokenType.IN: lambda self, this: self._parse_in(this), 865 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 866 TokenType.IS: lambda self, this: self._parse_is(this), 867 TokenType.LIKE: binary_range_parser(exp.Like), 868 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 869 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 870 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 871 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 872 } 873 874 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 875 "ALLOWED_VALUES": lambda self: self.expression( 876 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 877 ), 878 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 879 "AUTO": lambda self: self._parse_auto_property(), 880 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 881 "BACKUP": lambda self: self.expression( 882 exp.BackupProperty, this=self._parse_var(any_token=True) 883 ), 884 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 885 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 886 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 887 "CHECKSUM": lambda self: self._parse_checksum(), 888 "CLUSTER BY": lambda self: self._parse_cluster(), 889 "CLUSTERED": lambda self: self._parse_clustered_by(), 890 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 891 exp.CollateProperty, **kwargs 892 ), 893 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 894 "CONTAINS": lambda self: self._parse_contains_property(), 895 "COPY": lambda self: self._parse_copy_property(), 896 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 897 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 898 "DEFINER": lambda self: self._parse_definer(), 899 "DETERMINISTIC": lambda self: self.expression( 900 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 901 ), 902 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 903 "DUPLICATE": lambda self: self._parse_duplicate(), 904 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 905 "DISTKEY": lambda self: self._parse_distkey(), 906 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 907 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 908 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 909 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 910 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 911 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 912 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 913 "FREESPACE": lambda self: self._parse_freespace(), 914 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 915 "HEAP": lambda self: self.expression(exp.HeapProperty), 916 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 917 "IMMUTABLE": lambda self: self.expression( 918 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 919 ), 920 "INHERITS": lambda self: self.expression( 921 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 922 ), 923 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 924 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 925 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 926 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 927 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 928 "LIKE": lambda self: self._parse_create_like(), 929 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 930 "LOCK": lambda self: self._parse_locking(), 931 "LOCKING": lambda self: self._parse_locking(), 932 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 933 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 934 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 935 "MODIFIES": lambda self: self._parse_modifies_property(), 936 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 937 "NO": lambda self: self._parse_no_property(), 938 "ON": lambda self: self._parse_on_property(), 939 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 940 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 941 "PARTITION": lambda self: self._parse_partitioned_of(), 942 "PARTITION BY": lambda self: self._parse_partitioned_by(), 943 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 944 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 945 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 946 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 947 "READS": lambda self: self._parse_reads_property(), 948 "REMOTE": lambda self: self._parse_remote_with_connection(), 949 "RETURNS": lambda self: self._parse_returns(), 950 "STRICT": lambda self: self.expression(exp.StrictProperty), 951 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 952 "ROW": lambda self: self._parse_row(), 953 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 954 "SAMPLE": lambda self: self.expression( 955 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 956 ), 957 "SECURE": lambda self: self.expression(exp.SecureProperty), 958 "SECURITY": lambda self: self._parse_security(), 959 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 960 "SETTINGS": lambda self: self._parse_settings_property(), 961 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 962 "SORTKEY": lambda self: self._parse_sortkey(), 963 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 964 "STABLE": lambda self: self.expression( 965 exp.StabilityProperty, this=exp.Literal.string("STABLE") 966 ), 967 "STORED": lambda self: self._parse_stored(), 968 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 969 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 970 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 971 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 972 "TO": lambda self: self._parse_to_table(), 973 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 974 "TRANSFORM": lambda self: self.expression( 975 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 976 ), 977 "TTL": lambda self: self._parse_ttl(), 978 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 979 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 980 "VOLATILE": lambda self: self._parse_volatile_property(), 981 "WITH": lambda self: self._parse_with_property(), 982 } 983 984 CONSTRAINT_PARSERS = { 985 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 986 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 987 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 988 "CHARACTER SET": lambda self: self.expression( 989 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 990 ), 991 "CHECK": lambda self: self.expression( 992 exp.CheckColumnConstraint, 993 this=self._parse_wrapped(self._parse_assignment), 994 enforced=self._match_text_seq("ENFORCED"), 995 ), 996 "COLLATE": lambda self: self.expression( 997 exp.CollateColumnConstraint, 998 this=self._parse_identifier() or self._parse_column(), 999 ), 1000 "COMMENT": lambda self: self.expression( 1001 exp.CommentColumnConstraint, this=self._parse_string() 1002 ), 1003 "COMPRESS": lambda self: self._parse_compress(), 1004 "CLUSTERED": lambda self: self.expression( 1005 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1006 ), 1007 "NONCLUSTERED": lambda self: self.expression( 1008 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1009 ), 1010 "DEFAULT": lambda self: self.expression( 1011 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1012 ), 1013 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1014 "EPHEMERAL": lambda self: self.expression( 1015 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1016 ), 1017 "EXCLUDE": lambda self: self.expression( 1018 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1019 ), 1020 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1021 "FORMAT": lambda self: self.expression( 1022 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1023 ), 1024 "GENERATED": lambda self: self._parse_generated_as_identity(), 1025 "IDENTITY": lambda self: self._parse_auto_increment(), 1026 "INLINE": lambda self: self._parse_inline(), 1027 "LIKE": lambda self: self._parse_create_like(), 1028 "NOT": lambda self: self._parse_not_constraint(), 1029 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1030 "ON": lambda self: ( 1031 self._match(TokenType.UPDATE) 1032 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1033 ) 1034 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1035 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1036 "PERIOD": lambda self: self._parse_period_for_system_time(), 1037 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1038 "REFERENCES": lambda self: self._parse_references(match=False), 1039 "TITLE": lambda self: self.expression( 1040 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1041 ), 1042 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1043 "UNIQUE": lambda self: self._parse_unique(), 1044 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1045 "WITH": lambda self: self.expression( 1046 exp.Properties, expressions=self._parse_wrapped_properties() 1047 ), 1048 } 1049 1050 ALTER_PARSERS = { 1051 "ADD": lambda self: self._parse_alter_table_add(), 1052 "ALTER": lambda self: self._parse_alter_table_alter(), 1053 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1054 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1055 "DROP": lambda self: self._parse_alter_table_drop(), 1056 "RENAME": lambda self: self._parse_alter_table_rename(), 1057 "SET": lambda self: self._parse_alter_table_set(), 1058 "AS": lambda self: self._parse_select(), 1059 } 1060 1061 ALTER_ALTER_PARSERS = { 1062 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1063 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1064 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1065 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1066 } 1067 1068 SCHEMA_UNNAMED_CONSTRAINTS = { 1069 "CHECK", 1070 "EXCLUDE", 1071 "FOREIGN KEY", 1072 "LIKE", 1073 "PERIOD", 1074 "PRIMARY KEY", 1075 "UNIQUE", 1076 } 1077 1078 NO_PAREN_FUNCTION_PARSERS = { 1079 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1080 "CASE": lambda self: self._parse_case(), 1081 "CONNECT_BY_ROOT": lambda self: self.expression( 1082 exp.ConnectByRoot, this=self._parse_column() 1083 ), 1084 "IF": lambda self: self._parse_if(), 1085 "NEXT": lambda self: self._parse_next_value_for(), 1086 } 1087 1088 INVALID_FUNC_NAME_TOKENS = { 1089 TokenType.IDENTIFIER, 1090 TokenType.STRING, 1091 } 1092 1093 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1094 1095 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1096 1097 FUNCTION_PARSERS = { 1098 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1099 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1100 "DECODE": lambda self: self._parse_decode(), 1101 "EXTRACT": lambda self: self._parse_extract(), 1102 "GAP_FILL": lambda self: self._parse_gap_fill(), 1103 "JSON_OBJECT": lambda self: self._parse_json_object(), 1104 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1105 "JSON_TABLE": lambda self: self._parse_json_table(), 1106 "MATCH": lambda self: self._parse_match_against(), 1107 "NORMALIZE": lambda self: self._parse_normalize(), 1108 "OPENJSON": lambda self: self._parse_open_json(), 1109 "POSITION": lambda self: self._parse_position(), 1110 "PREDICT": lambda self: self._parse_predict(), 1111 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1112 "STRING_AGG": lambda self: self._parse_string_agg(), 1113 "SUBSTRING": lambda self: self._parse_substring(), 1114 "TRIM": lambda self: self._parse_trim(), 1115 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1116 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1117 } 1118 1119 QUERY_MODIFIER_PARSERS = { 1120 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1121 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1122 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1123 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1124 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1125 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1126 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1127 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1128 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1129 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1130 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1131 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1132 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1133 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1134 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1135 TokenType.CLUSTER_BY: lambda self: ( 1136 "cluster", 1137 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1138 ), 1139 TokenType.DISTRIBUTE_BY: lambda self: ( 1140 "distribute", 1141 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1142 ), 1143 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1144 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1145 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1146 } 1147 1148 SET_PARSERS = { 1149 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1150 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1151 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1152 "TRANSACTION": lambda self: self._parse_set_transaction(), 1153 } 1154 1155 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1156 1157 TYPE_LITERAL_PARSERS = { 1158 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1159 } 1160 1161 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1162 1163 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1164 1165 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1166 1167 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1168 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1169 "ISOLATION": ( 1170 ("LEVEL", "REPEATABLE", "READ"), 1171 ("LEVEL", "READ", "COMMITTED"), 1172 ("LEVEL", "READ", "UNCOMITTED"), 1173 ("LEVEL", "SERIALIZABLE"), 1174 ), 1175 "READ": ("WRITE", "ONLY"), 1176 } 1177 1178 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1179 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1180 ) 1181 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1182 1183 CREATE_SEQUENCE: OPTIONS_TYPE = { 1184 "SCALE": ("EXTEND", "NOEXTEND"), 1185 "SHARD": ("EXTEND", "NOEXTEND"), 1186 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1187 **dict.fromkeys( 1188 ( 1189 "SESSION", 1190 "GLOBAL", 1191 "KEEP", 1192 "NOKEEP", 1193 "ORDER", 1194 "NOORDER", 1195 "NOCACHE", 1196 "CYCLE", 1197 "NOCYCLE", 1198 "NOMINVALUE", 1199 "NOMAXVALUE", 1200 "NOSCALE", 1201 "NOSHARD", 1202 ), 1203 tuple(), 1204 ), 1205 } 1206 1207 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1208 1209 USABLES: OPTIONS_TYPE = dict.fromkeys( 1210 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1211 ) 1212 1213 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1214 1215 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1216 "TYPE": ("EVOLUTION",), 1217 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1218 } 1219 1220 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1221 "NOT": ("ENFORCED",), 1222 "MATCH": ( 1223 "FULL", 1224 "PARTIAL", 1225 "SIMPLE", 1226 ), 1227 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1228 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1229 } 1230 1231 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1232 1233 CLONE_KEYWORDS = {"CLONE", "COPY"} 1234 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1235 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1236 1237 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1238 1239 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1240 1241 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1242 1243 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1244 1245 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1246 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1247 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1248 1249 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1250 1251 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1252 1253 ADD_CONSTRAINT_TOKENS = { 1254 TokenType.CONSTRAINT, 1255 TokenType.FOREIGN_KEY, 1256 TokenType.INDEX, 1257 TokenType.KEY, 1258 TokenType.PRIMARY_KEY, 1259 TokenType.UNIQUE, 1260 } 1261 1262 DISTINCT_TOKENS = {TokenType.DISTINCT} 1263 1264 NULL_TOKENS = {TokenType.NULL} 1265 1266 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1267 1268 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1269 1270 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1271 1272 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1273 1274 ODBC_DATETIME_LITERALS = { 1275 "d": exp.Date, 1276 "t": exp.Time, 1277 "ts": exp.Timestamp, 1278 } 1279 1280 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1281 1282 STRICT_CAST = True 1283 1284 PREFIXED_PIVOT_COLUMNS = False 1285 IDENTIFY_PIVOT_STRINGS = False 1286 1287 LOG_DEFAULTS_TO_LN = False 1288 1289 # Whether ADD is present for each column added by ALTER TABLE 1290 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1291 1292 # Whether the table sample clause expects CSV syntax 1293 TABLESAMPLE_CSV = False 1294 1295 # The default method used for table sampling 1296 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1297 1298 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1299 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1300 1301 # Whether the TRIM function expects the characters to trim as its first argument 1302 TRIM_PATTERN_FIRST = False 1303 1304 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1305 STRING_ALIASES = False 1306 1307 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1308 MODIFIERS_ATTACHED_TO_SET_OP = True 1309 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1310 1311 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1312 NO_PAREN_IF_COMMANDS = True 1313 1314 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1315 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1316 1317 # Whether the `:` operator is used to extract a value from a VARIANT column 1318 COLON_IS_VARIANT_EXTRACT = False 1319 1320 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1321 # If this is True and '(' is not found, the keyword will be treated as an identifier 1322 VALUES_FOLLOWED_BY_PAREN = True 1323 1324 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1325 SUPPORTS_IMPLICIT_UNNEST = False 1326 1327 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1328 INTERVAL_SPANS = True 1329 1330 # Whether a PARTITION clause can follow a table reference 1331 SUPPORTS_PARTITION_SELECTION = False 1332 1333 __slots__ = ( 1334 "error_level", 1335 "error_message_context", 1336 "max_errors", 1337 "dialect", 1338 "sql", 1339 "errors", 1340 "_tokens", 1341 "_index", 1342 "_curr", 1343 "_next", 1344 "_prev", 1345 "_prev_comments", 1346 ) 1347 1348 # Autofilled 1349 SHOW_TRIE: t.Dict = {} 1350 SET_TRIE: t.Dict = {} 1351 1352 def __init__( 1353 self, 1354 error_level: t.Optional[ErrorLevel] = None, 1355 error_message_context: int = 100, 1356 max_errors: int = 3, 1357 dialect: DialectType = None, 1358 ): 1359 from sqlglot.dialects import Dialect 1360 1361 self.error_level = error_level or ErrorLevel.IMMEDIATE 1362 self.error_message_context = error_message_context 1363 self.max_errors = max_errors 1364 self.dialect = Dialect.get_or_raise(dialect) 1365 self.reset() 1366 1367 def reset(self): 1368 self.sql = "" 1369 self.errors = [] 1370 self._tokens = [] 1371 self._index = 0 1372 self._curr = None 1373 self._next = None 1374 self._prev = None 1375 self._prev_comments = None 1376 1377 def parse( 1378 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1379 ) -> t.List[t.Optional[exp.Expression]]: 1380 """ 1381 Parses a list of tokens and returns a list of syntax trees, one tree 1382 per parsed SQL statement. 1383 1384 Args: 1385 raw_tokens: The list of tokens. 1386 sql: The original SQL string, used to produce helpful debug messages. 1387 1388 Returns: 1389 The list of the produced syntax trees. 1390 """ 1391 return self._parse( 1392 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1393 ) 1394 1395 def parse_into( 1396 self, 1397 expression_types: exp.IntoType, 1398 raw_tokens: t.List[Token], 1399 sql: t.Optional[str] = None, 1400 ) -> t.List[t.Optional[exp.Expression]]: 1401 """ 1402 Parses a list of tokens into a given Expression type. If a collection of Expression 1403 types is given instead, this method will try to parse the token list into each one 1404 of them, stopping at the first for which the parsing succeeds. 1405 1406 Args: 1407 expression_types: The expression type(s) to try and parse the token list into. 1408 raw_tokens: The list of tokens. 1409 sql: The original SQL string, used to produce helpful debug messages. 1410 1411 Returns: 1412 The target Expression. 1413 """ 1414 errors = [] 1415 for expression_type in ensure_list(expression_types): 1416 parser = self.EXPRESSION_PARSERS.get(expression_type) 1417 if not parser: 1418 raise TypeError(f"No parser registered for {expression_type}") 1419 1420 try: 1421 return self._parse(parser, raw_tokens, sql) 1422 except ParseError as e: 1423 e.errors[0]["into_expression"] = expression_type 1424 errors.append(e) 1425 1426 raise ParseError( 1427 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1428 errors=merge_errors(errors), 1429 ) from errors[-1] 1430 1431 def _parse( 1432 self, 1433 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1434 raw_tokens: t.List[Token], 1435 sql: t.Optional[str] = None, 1436 ) -> t.List[t.Optional[exp.Expression]]: 1437 self.reset() 1438 self.sql = sql or "" 1439 1440 total = len(raw_tokens) 1441 chunks: t.List[t.List[Token]] = [[]] 1442 1443 for i, token in enumerate(raw_tokens): 1444 if token.token_type == TokenType.SEMICOLON: 1445 if token.comments: 1446 chunks.append([token]) 1447 1448 if i < total - 1: 1449 chunks.append([]) 1450 else: 1451 chunks[-1].append(token) 1452 1453 expressions = [] 1454 1455 for tokens in chunks: 1456 self._index = -1 1457 self._tokens = tokens 1458 self._advance() 1459 1460 expressions.append(parse_method(self)) 1461 1462 if self._index < len(self._tokens): 1463 self.raise_error("Invalid expression / Unexpected token") 1464 1465 self.check_errors() 1466 1467 return expressions 1468 1469 def check_errors(self) -> None: 1470 """Logs or raises any found errors, depending on the chosen error level setting.""" 1471 if self.error_level == ErrorLevel.WARN: 1472 for error in self.errors: 1473 logger.error(str(error)) 1474 elif self.error_level == ErrorLevel.RAISE and self.errors: 1475 raise ParseError( 1476 concat_messages(self.errors, self.max_errors), 1477 errors=merge_errors(self.errors), 1478 ) 1479 1480 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1481 """ 1482 Appends an error in the list of recorded errors or raises it, depending on the chosen 1483 error level setting. 1484 """ 1485 token = token or self._curr or self._prev or Token.string("") 1486 start = token.start 1487 end = token.end + 1 1488 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1489 highlight = self.sql[start:end] 1490 end_context = self.sql[end : end + self.error_message_context] 1491 1492 error = ParseError.new( 1493 f"{message}. Line {token.line}, Col: {token.col}.\n" 1494 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1495 description=message, 1496 line=token.line, 1497 col=token.col, 1498 start_context=start_context, 1499 highlight=highlight, 1500 end_context=end_context, 1501 ) 1502 1503 if self.error_level == ErrorLevel.IMMEDIATE: 1504 raise error 1505 1506 self.errors.append(error) 1507 1508 def expression( 1509 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1510 ) -> E: 1511 """ 1512 Creates a new, validated Expression. 1513 1514 Args: 1515 exp_class: The expression class to instantiate. 1516 comments: An optional list of comments to attach to the expression. 1517 kwargs: The arguments to set for the expression along with their respective values. 1518 1519 Returns: 1520 The target expression. 1521 """ 1522 instance = exp_class(**kwargs) 1523 instance.add_comments(comments) if comments else self._add_comments(instance) 1524 return self.validate_expression(instance) 1525 1526 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1527 if expression and self._prev_comments: 1528 expression.add_comments(self._prev_comments) 1529 self._prev_comments = None 1530 1531 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1532 """ 1533 Validates an Expression, making sure that all its mandatory arguments are set. 1534 1535 Args: 1536 expression: The expression to validate. 1537 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1538 1539 Returns: 1540 The validated expression. 1541 """ 1542 if self.error_level != ErrorLevel.IGNORE: 1543 for error_message in expression.error_messages(args): 1544 self.raise_error(error_message) 1545 1546 return expression 1547 1548 def _find_sql(self, start: Token, end: Token) -> str: 1549 return self.sql[start.start : end.end + 1] 1550 1551 def _is_connected(self) -> bool: 1552 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1553 1554 def _advance(self, times: int = 1) -> None: 1555 self._index += times 1556 self._curr = seq_get(self._tokens, self._index) 1557 self._next = seq_get(self._tokens, self._index + 1) 1558 1559 if self._index > 0: 1560 self._prev = self._tokens[self._index - 1] 1561 self._prev_comments = self._prev.comments 1562 else: 1563 self._prev = None 1564 self._prev_comments = None 1565 1566 def _retreat(self, index: int) -> None: 1567 if index != self._index: 1568 self._advance(index - self._index) 1569 1570 def _warn_unsupported(self) -> None: 1571 if len(self._tokens) <= 1: 1572 return 1573 1574 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1575 # interested in emitting a warning for the one being currently processed. 1576 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1577 1578 logger.warning( 1579 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1580 ) 1581 1582 def _parse_command(self) -> exp.Command: 1583 self._warn_unsupported() 1584 return self.expression( 1585 exp.Command, 1586 comments=self._prev_comments, 1587 this=self._prev.text.upper(), 1588 expression=self._parse_string(), 1589 ) 1590 1591 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1592 """ 1593 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1594 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1595 solve this by setting & resetting the parser state accordingly 1596 """ 1597 index = self._index 1598 error_level = self.error_level 1599 1600 self.error_level = ErrorLevel.IMMEDIATE 1601 try: 1602 this = parse_method() 1603 except ParseError: 1604 this = None 1605 finally: 1606 if not this or retreat: 1607 self._retreat(index) 1608 self.error_level = error_level 1609 1610 return this 1611 1612 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1613 start = self._prev 1614 exists = self._parse_exists() if allow_exists else None 1615 1616 self._match(TokenType.ON) 1617 1618 materialized = self._match_text_seq("MATERIALIZED") 1619 kind = self._match_set(self.CREATABLES) and self._prev 1620 if not kind: 1621 return self._parse_as_command(start) 1622 1623 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1624 this = self._parse_user_defined_function(kind=kind.token_type) 1625 elif kind.token_type == TokenType.TABLE: 1626 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1627 elif kind.token_type == TokenType.COLUMN: 1628 this = self._parse_column() 1629 else: 1630 this = self._parse_id_var() 1631 1632 self._match(TokenType.IS) 1633 1634 return self.expression( 1635 exp.Comment, 1636 this=this, 1637 kind=kind.text, 1638 expression=self._parse_string(), 1639 exists=exists, 1640 materialized=materialized, 1641 ) 1642 1643 def _parse_to_table( 1644 self, 1645 ) -> exp.ToTableProperty: 1646 table = self._parse_table_parts(schema=True) 1647 return self.expression(exp.ToTableProperty, this=table) 1648 1649 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1650 def _parse_ttl(self) -> exp.Expression: 1651 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1652 this = self._parse_bitwise() 1653 1654 if self._match_text_seq("DELETE"): 1655 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1656 if self._match_text_seq("RECOMPRESS"): 1657 return self.expression( 1658 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1659 ) 1660 if self._match_text_seq("TO", "DISK"): 1661 return self.expression( 1662 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1663 ) 1664 if self._match_text_seq("TO", "VOLUME"): 1665 return self.expression( 1666 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1667 ) 1668 1669 return this 1670 1671 expressions = self._parse_csv(_parse_ttl_action) 1672 where = self._parse_where() 1673 group = self._parse_group() 1674 1675 aggregates = None 1676 if group and self._match(TokenType.SET): 1677 aggregates = self._parse_csv(self._parse_set_item) 1678 1679 return self.expression( 1680 exp.MergeTreeTTL, 1681 expressions=expressions, 1682 where=where, 1683 group=group, 1684 aggregates=aggregates, 1685 ) 1686 1687 def _parse_statement(self) -> t.Optional[exp.Expression]: 1688 if self._curr is None: 1689 return None 1690 1691 if self._match_set(self.STATEMENT_PARSERS): 1692 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1693 1694 if self._match_set(self.dialect.tokenizer.COMMANDS): 1695 return self._parse_command() 1696 1697 expression = self._parse_expression() 1698 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1699 return self._parse_query_modifiers(expression) 1700 1701 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1702 start = self._prev 1703 temporary = self._match(TokenType.TEMPORARY) 1704 materialized = self._match_text_seq("MATERIALIZED") 1705 1706 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1707 if not kind: 1708 return self._parse_as_command(start) 1709 1710 concurrently = self._match_text_seq("CONCURRENTLY") 1711 if_exists = exists or self._parse_exists() 1712 table = self._parse_table_parts( 1713 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1714 ) 1715 1716 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1717 1718 if self._match(TokenType.L_PAREN, advance=False): 1719 expressions = self._parse_wrapped_csv(self._parse_types) 1720 else: 1721 expressions = None 1722 1723 return self.expression( 1724 exp.Drop, 1725 comments=start.comments, 1726 exists=if_exists, 1727 this=table, 1728 expressions=expressions, 1729 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1730 temporary=temporary, 1731 materialized=materialized, 1732 cascade=self._match_text_seq("CASCADE"), 1733 constraints=self._match_text_seq("CONSTRAINTS"), 1734 purge=self._match_text_seq("PURGE"), 1735 cluster=cluster, 1736 concurrently=concurrently, 1737 ) 1738 1739 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1740 return ( 1741 self._match_text_seq("IF") 1742 and (not not_ or self._match(TokenType.NOT)) 1743 and self._match(TokenType.EXISTS) 1744 ) 1745 1746 def _parse_create(self) -> exp.Create | exp.Command: 1747 # Note: this can't be None because we've matched a statement parser 1748 start = self._prev 1749 comments = self._prev_comments 1750 1751 replace = ( 1752 start.token_type == TokenType.REPLACE 1753 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1754 or self._match_pair(TokenType.OR, TokenType.ALTER) 1755 ) 1756 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1757 1758 unique = self._match(TokenType.UNIQUE) 1759 1760 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1761 clustered = True 1762 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1763 "COLUMNSTORE" 1764 ): 1765 clustered = False 1766 else: 1767 clustered = None 1768 1769 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1770 self._advance() 1771 1772 properties = None 1773 create_token = self._match_set(self.CREATABLES) and self._prev 1774 1775 if not create_token: 1776 # exp.Properties.Location.POST_CREATE 1777 properties = self._parse_properties() 1778 create_token = self._match_set(self.CREATABLES) and self._prev 1779 1780 if not properties or not create_token: 1781 return self._parse_as_command(start) 1782 1783 concurrently = self._match_text_seq("CONCURRENTLY") 1784 exists = self._parse_exists(not_=True) 1785 this = None 1786 expression: t.Optional[exp.Expression] = None 1787 indexes = None 1788 no_schema_binding = None 1789 begin = None 1790 end = None 1791 clone = None 1792 1793 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1794 nonlocal properties 1795 if properties and temp_props: 1796 properties.expressions.extend(temp_props.expressions) 1797 elif temp_props: 1798 properties = temp_props 1799 1800 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1801 this = self._parse_user_defined_function(kind=create_token.token_type) 1802 1803 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1804 extend_props(self._parse_properties()) 1805 1806 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1807 extend_props(self._parse_properties()) 1808 1809 if not expression: 1810 if self._match(TokenType.COMMAND): 1811 expression = self._parse_as_command(self._prev) 1812 else: 1813 begin = self._match(TokenType.BEGIN) 1814 return_ = self._match_text_seq("RETURN") 1815 1816 if self._match(TokenType.STRING, advance=False): 1817 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1818 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1819 expression = self._parse_string() 1820 extend_props(self._parse_properties()) 1821 else: 1822 expression = self._parse_statement() 1823 1824 end = self._match_text_seq("END") 1825 1826 if return_: 1827 expression = self.expression(exp.Return, this=expression) 1828 elif create_token.token_type == TokenType.INDEX: 1829 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1830 if not self._match(TokenType.ON): 1831 index = self._parse_id_var() 1832 anonymous = False 1833 else: 1834 index = None 1835 anonymous = True 1836 1837 this = self._parse_index(index=index, anonymous=anonymous) 1838 elif create_token.token_type in self.DB_CREATABLES: 1839 table_parts = self._parse_table_parts( 1840 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1841 ) 1842 1843 # exp.Properties.Location.POST_NAME 1844 self._match(TokenType.COMMA) 1845 extend_props(self._parse_properties(before=True)) 1846 1847 this = self._parse_schema(this=table_parts) 1848 1849 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1850 extend_props(self._parse_properties()) 1851 1852 self._match(TokenType.ALIAS) 1853 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1854 # exp.Properties.Location.POST_ALIAS 1855 extend_props(self._parse_properties()) 1856 1857 if create_token.token_type == TokenType.SEQUENCE: 1858 expression = self._parse_types() 1859 extend_props(self._parse_properties()) 1860 else: 1861 expression = self._parse_ddl_select() 1862 1863 if create_token.token_type == TokenType.TABLE: 1864 # exp.Properties.Location.POST_EXPRESSION 1865 extend_props(self._parse_properties()) 1866 1867 indexes = [] 1868 while True: 1869 index = self._parse_index() 1870 1871 # exp.Properties.Location.POST_INDEX 1872 extend_props(self._parse_properties()) 1873 if not index: 1874 break 1875 else: 1876 self._match(TokenType.COMMA) 1877 indexes.append(index) 1878 elif create_token.token_type == TokenType.VIEW: 1879 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1880 no_schema_binding = True 1881 1882 shallow = self._match_text_seq("SHALLOW") 1883 1884 if self._match_texts(self.CLONE_KEYWORDS): 1885 copy = self._prev.text.lower() == "copy" 1886 clone = self.expression( 1887 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1888 ) 1889 1890 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1891 return self._parse_as_command(start) 1892 1893 create_kind_text = create_token.text.upper() 1894 return self.expression( 1895 exp.Create, 1896 comments=comments, 1897 this=this, 1898 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1899 replace=replace, 1900 refresh=refresh, 1901 unique=unique, 1902 expression=expression, 1903 exists=exists, 1904 properties=properties, 1905 indexes=indexes, 1906 no_schema_binding=no_schema_binding, 1907 begin=begin, 1908 end=end, 1909 clone=clone, 1910 concurrently=concurrently, 1911 clustered=clustered, 1912 ) 1913 1914 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1915 seq = exp.SequenceProperties() 1916 1917 options = [] 1918 index = self._index 1919 1920 while self._curr: 1921 self._match(TokenType.COMMA) 1922 if self._match_text_seq("INCREMENT"): 1923 self._match_text_seq("BY") 1924 self._match_text_seq("=") 1925 seq.set("increment", self._parse_term()) 1926 elif self._match_text_seq("MINVALUE"): 1927 seq.set("minvalue", self._parse_term()) 1928 elif self._match_text_seq("MAXVALUE"): 1929 seq.set("maxvalue", self._parse_term()) 1930 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1931 self._match_text_seq("=") 1932 seq.set("start", self._parse_term()) 1933 elif self._match_text_seq("CACHE"): 1934 # T-SQL allows empty CACHE which is initialized dynamically 1935 seq.set("cache", self._parse_number() or True) 1936 elif self._match_text_seq("OWNED", "BY"): 1937 # "OWNED BY NONE" is the default 1938 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1939 else: 1940 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1941 if opt: 1942 options.append(opt) 1943 else: 1944 break 1945 1946 seq.set("options", options if options else None) 1947 return None if self._index == index else seq 1948 1949 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1950 # only used for teradata currently 1951 self._match(TokenType.COMMA) 1952 1953 kwargs = { 1954 "no": self._match_text_seq("NO"), 1955 "dual": self._match_text_seq("DUAL"), 1956 "before": self._match_text_seq("BEFORE"), 1957 "default": self._match_text_seq("DEFAULT"), 1958 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1959 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1960 "after": self._match_text_seq("AFTER"), 1961 "minimum": self._match_texts(("MIN", "MINIMUM")), 1962 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1963 } 1964 1965 if self._match_texts(self.PROPERTY_PARSERS): 1966 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1967 try: 1968 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1969 except TypeError: 1970 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1971 1972 return None 1973 1974 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1975 return self._parse_wrapped_csv(self._parse_property) 1976 1977 def _parse_property(self) -> t.Optional[exp.Expression]: 1978 if self._match_texts(self.PROPERTY_PARSERS): 1979 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1980 1981 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1982 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1983 1984 if self._match_text_seq("COMPOUND", "SORTKEY"): 1985 return self._parse_sortkey(compound=True) 1986 1987 if self._match_text_seq("SQL", "SECURITY"): 1988 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1989 1990 index = self._index 1991 key = self._parse_column() 1992 1993 if not self._match(TokenType.EQ): 1994 self._retreat(index) 1995 return self._parse_sequence_properties() 1996 1997 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1998 if isinstance(key, exp.Column): 1999 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2000 2001 value = self._parse_bitwise() or self._parse_var(any_token=True) 2002 2003 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2004 if isinstance(value, exp.Column): 2005 value = exp.var(value.name) 2006 2007 return self.expression(exp.Property, this=key, value=value) 2008 2009 def _parse_stored(self) -> exp.FileFormatProperty: 2010 self._match(TokenType.ALIAS) 2011 2012 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2013 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2014 2015 return self.expression( 2016 exp.FileFormatProperty, 2017 this=( 2018 self.expression( 2019 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2020 ) 2021 if input_format or output_format 2022 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2023 ), 2024 ) 2025 2026 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2027 field = self._parse_field() 2028 if isinstance(field, exp.Identifier) and not field.quoted: 2029 field = exp.var(field) 2030 2031 return field 2032 2033 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2034 self._match(TokenType.EQ) 2035 self._match(TokenType.ALIAS) 2036 2037 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2038 2039 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2040 properties = [] 2041 while True: 2042 if before: 2043 prop = self._parse_property_before() 2044 else: 2045 prop = self._parse_property() 2046 if not prop: 2047 break 2048 for p in ensure_list(prop): 2049 properties.append(p) 2050 2051 if properties: 2052 return self.expression(exp.Properties, expressions=properties) 2053 2054 return None 2055 2056 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2057 return self.expression( 2058 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2059 ) 2060 2061 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2062 if self._match_texts(("DEFINER", "INVOKER")): 2063 security_specifier = self._prev.text.upper() 2064 return self.expression(exp.SecurityProperty, this=security_specifier) 2065 return None 2066 2067 def _parse_settings_property(self) -> exp.SettingsProperty: 2068 return self.expression( 2069 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2070 ) 2071 2072 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2073 if self._index >= 2: 2074 pre_volatile_token = self._tokens[self._index - 2] 2075 else: 2076 pre_volatile_token = None 2077 2078 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2079 return exp.VolatileProperty() 2080 2081 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2082 2083 def _parse_retention_period(self) -> exp.Var: 2084 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2085 number = self._parse_number() 2086 number_str = f"{number} " if number else "" 2087 unit = self._parse_var(any_token=True) 2088 return exp.var(f"{number_str}{unit}") 2089 2090 def _parse_system_versioning_property( 2091 self, with_: bool = False 2092 ) -> exp.WithSystemVersioningProperty: 2093 self._match(TokenType.EQ) 2094 prop = self.expression( 2095 exp.WithSystemVersioningProperty, 2096 **{ # type: ignore 2097 "on": True, 2098 "with": with_, 2099 }, 2100 ) 2101 2102 if self._match_text_seq("OFF"): 2103 prop.set("on", False) 2104 return prop 2105 2106 self._match(TokenType.ON) 2107 if self._match(TokenType.L_PAREN): 2108 while self._curr and not self._match(TokenType.R_PAREN): 2109 if self._match_text_seq("HISTORY_TABLE", "="): 2110 prop.set("this", self._parse_table_parts()) 2111 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2112 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2113 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2114 prop.set("retention_period", self._parse_retention_period()) 2115 2116 self._match(TokenType.COMMA) 2117 2118 return prop 2119 2120 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2121 self._match(TokenType.EQ) 2122 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2123 prop = self.expression(exp.DataDeletionProperty, on=on) 2124 2125 if self._match(TokenType.L_PAREN): 2126 while self._curr and not self._match(TokenType.R_PAREN): 2127 if self._match_text_seq("FILTER_COLUMN", "="): 2128 prop.set("filter_column", self._parse_column()) 2129 elif self._match_text_seq("RETENTION_PERIOD", "="): 2130 prop.set("retention_period", self._parse_retention_period()) 2131 2132 self._match(TokenType.COMMA) 2133 2134 return prop 2135 2136 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2137 kind = "HASH" 2138 expressions: t.Optional[t.List[exp.Expression]] = None 2139 if self._match_text_seq("BY", "HASH"): 2140 expressions = self._parse_wrapped_csv(self._parse_id_var) 2141 elif self._match_text_seq("BY", "RANDOM"): 2142 kind = "RANDOM" 2143 2144 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2145 buckets: t.Optional[exp.Expression] = None 2146 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2147 buckets = self._parse_number() 2148 2149 return self.expression( 2150 exp.DistributedByProperty, 2151 expressions=expressions, 2152 kind=kind, 2153 buckets=buckets, 2154 order=self._parse_order(), 2155 ) 2156 2157 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2158 self._match_text_seq("KEY") 2159 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2160 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2161 2162 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2163 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2164 prop = self._parse_system_versioning_property(with_=True) 2165 self._match_r_paren() 2166 return prop 2167 2168 if self._match(TokenType.L_PAREN, advance=False): 2169 return self._parse_wrapped_properties() 2170 2171 if self._match_text_seq("JOURNAL"): 2172 return self._parse_withjournaltable() 2173 2174 if self._match_texts(self.VIEW_ATTRIBUTES): 2175 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2176 2177 if self._match_text_seq("DATA"): 2178 return self._parse_withdata(no=False) 2179 elif self._match_text_seq("NO", "DATA"): 2180 return self._parse_withdata(no=True) 2181 2182 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2183 return self._parse_serde_properties(with_=True) 2184 2185 if self._match(TokenType.SCHEMA): 2186 return self.expression( 2187 exp.WithSchemaBindingProperty, 2188 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2189 ) 2190 2191 if not self._next: 2192 return None 2193 2194 return self._parse_withisolatedloading() 2195 2196 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2197 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2198 self._match(TokenType.EQ) 2199 2200 user = self._parse_id_var() 2201 self._match(TokenType.PARAMETER) 2202 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2203 2204 if not user or not host: 2205 return None 2206 2207 return exp.DefinerProperty(this=f"{user}@{host}") 2208 2209 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2210 self._match(TokenType.TABLE) 2211 self._match(TokenType.EQ) 2212 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2213 2214 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2215 return self.expression(exp.LogProperty, no=no) 2216 2217 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2218 return self.expression(exp.JournalProperty, **kwargs) 2219 2220 def _parse_checksum(self) -> exp.ChecksumProperty: 2221 self._match(TokenType.EQ) 2222 2223 on = None 2224 if self._match(TokenType.ON): 2225 on = True 2226 elif self._match_text_seq("OFF"): 2227 on = False 2228 2229 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2230 2231 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2232 return self.expression( 2233 exp.Cluster, 2234 expressions=( 2235 self._parse_wrapped_csv(self._parse_ordered) 2236 if wrapped 2237 else self._parse_csv(self._parse_ordered) 2238 ), 2239 ) 2240 2241 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2242 self._match_text_seq("BY") 2243 2244 self._match_l_paren() 2245 expressions = self._parse_csv(self._parse_column) 2246 self._match_r_paren() 2247 2248 if self._match_text_seq("SORTED", "BY"): 2249 self._match_l_paren() 2250 sorted_by = self._parse_csv(self._parse_ordered) 2251 self._match_r_paren() 2252 else: 2253 sorted_by = None 2254 2255 self._match(TokenType.INTO) 2256 buckets = self._parse_number() 2257 self._match_text_seq("BUCKETS") 2258 2259 return self.expression( 2260 exp.ClusteredByProperty, 2261 expressions=expressions, 2262 sorted_by=sorted_by, 2263 buckets=buckets, 2264 ) 2265 2266 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2267 if not self._match_text_seq("GRANTS"): 2268 self._retreat(self._index - 1) 2269 return None 2270 2271 return self.expression(exp.CopyGrantsProperty) 2272 2273 def _parse_freespace(self) -> exp.FreespaceProperty: 2274 self._match(TokenType.EQ) 2275 return self.expression( 2276 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2277 ) 2278 2279 def _parse_mergeblockratio( 2280 self, no: bool = False, default: bool = False 2281 ) -> exp.MergeBlockRatioProperty: 2282 if self._match(TokenType.EQ): 2283 return self.expression( 2284 exp.MergeBlockRatioProperty, 2285 this=self._parse_number(), 2286 percent=self._match(TokenType.PERCENT), 2287 ) 2288 2289 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2290 2291 def _parse_datablocksize( 2292 self, 2293 default: t.Optional[bool] = None, 2294 minimum: t.Optional[bool] = None, 2295 maximum: t.Optional[bool] = None, 2296 ) -> exp.DataBlocksizeProperty: 2297 self._match(TokenType.EQ) 2298 size = self._parse_number() 2299 2300 units = None 2301 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2302 units = self._prev.text 2303 2304 return self.expression( 2305 exp.DataBlocksizeProperty, 2306 size=size, 2307 units=units, 2308 default=default, 2309 minimum=minimum, 2310 maximum=maximum, 2311 ) 2312 2313 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2314 self._match(TokenType.EQ) 2315 always = self._match_text_seq("ALWAYS") 2316 manual = self._match_text_seq("MANUAL") 2317 never = self._match_text_seq("NEVER") 2318 default = self._match_text_seq("DEFAULT") 2319 2320 autotemp = None 2321 if self._match_text_seq("AUTOTEMP"): 2322 autotemp = self._parse_schema() 2323 2324 return self.expression( 2325 exp.BlockCompressionProperty, 2326 always=always, 2327 manual=manual, 2328 never=never, 2329 default=default, 2330 autotemp=autotemp, 2331 ) 2332 2333 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2334 index = self._index 2335 no = self._match_text_seq("NO") 2336 concurrent = self._match_text_seq("CONCURRENT") 2337 2338 if not self._match_text_seq("ISOLATED", "LOADING"): 2339 self._retreat(index) 2340 return None 2341 2342 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2343 return self.expression( 2344 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2345 ) 2346 2347 def _parse_locking(self) -> exp.LockingProperty: 2348 if self._match(TokenType.TABLE): 2349 kind = "TABLE" 2350 elif self._match(TokenType.VIEW): 2351 kind = "VIEW" 2352 elif self._match(TokenType.ROW): 2353 kind = "ROW" 2354 elif self._match_text_seq("DATABASE"): 2355 kind = "DATABASE" 2356 else: 2357 kind = None 2358 2359 if kind in ("DATABASE", "TABLE", "VIEW"): 2360 this = self._parse_table_parts() 2361 else: 2362 this = None 2363 2364 if self._match(TokenType.FOR): 2365 for_or_in = "FOR" 2366 elif self._match(TokenType.IN): 2367 for_or_in = "IN" 2368 else: 2369 for_or_in = None 2370 2371 if self._match_text_seq("ACCESS"): 2372 lock_type = "ACCESS" 2373 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2374 lock_type = "EXCLUSIVE" 2375 elif self._match_text_seq("SHARE"): 2376 lock_type = "SHARE" 2377 elif self._match_text_seq("READ"): 2378 lock_type = "READ" 2379 elif self._match_text_seq("WRITE"): 2380 lock_type = "WRITE" 2381 elif self._match_text_seq("CHECKSUM"): 2382 lock_type = "CHECKSUM" 2383 else: 2384 lock_type = None 2385 2386 override = self._match_text_seq("OVERRIDE") 2387 2388 return self.expression( 2389 exp.LockingProperty, 2390 this=this, 2391 kind=kind, 2392 for_or_in=for_or_in, 2393 lock_type=lock_type, 2394 override=override, 2395 ) 2396 2397 def _parse_partition_by(self) -> t.List[exp.Expression]: 2398 if self._match(TokenType.PARTITION_BY): 2399 return self._parse_csv(self._parse_assignment) 2400 return [] 2401 2402 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2403 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2404 if self._match_text_seq("MINVALUE"): 2405 return exp.var("MINVALUE") 2406 if self._match_text_seq("MAXVALUE"): 2407 return exp.var("MAXVALUE") 2408 return self._parse_bitwise() 2409 2410 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2411 expression = None 2412 from_expressions = None 2413 to_expressions = None 2414 2415 if self._match(TokenType.IN): 2416 this = self._parse_wrapped_csv(self._parse_bitwise) 2417 elif self._match(TokenType.FROM): 2418 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2419 self._match_text_seq("TO") 2420 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2421 elif self._match_text_seq("WITH", "(", "MODULUS"): 2422 this = self._parse_number() 2423 self._match_text_seq(",", "REMAINDER") 2424 expression = self._parse_number() 2425 self._match_r_paren() 2426 else: 2427 self.raise_error("Failed to parse partition bound spec.") 2428 2429 return self.expression( 2430 exp.PartitionBoundSpec, 2431 this=this, 2432 expression=expression, 2433 from_expressions=from_expressions, 2434 to_expressions=to_expressions, 2435 ) 2436 2437 # https://www.postgresql.org/docs/current/sql-createtable.html 2438 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2439 if not self._match_text_seq("OF"): 2440 self._retreat(self._index - 1) 2441 return None 2442 2443 this = self._parse_table(schema=True) 2444 2445 if self._match(TokenType.DEFAULT): 2446 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2447 elif self._match_text_seq("FOR", "VALUES"): 2448 expression = self._parse_partition_bound_spec() 2449 else: 2450 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2451 2452 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2453 2454 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2455 self._match(TokenType.EQ) 2456 return self.expression( 2457 exp.PartitionedByProperty, 2458 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2459 ) 2460 2461 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2462 if self._match_text_seq("AND", "STATISTICS"): 2463 statistics = True 2464 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2465 statistics = False 2466 else: 2467 statistics = None 2468 2469 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2470 2471 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2472 if self._match_text_seq("SQL"): 2473 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2474 return None 2475 2476 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2477 if self._match_text_seq("SQL", "DATA"): 2478 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2479 return None 2480 2481 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2482 if self._match_text_seq("PRIMARY", "INDEX"): 2483 return exp.NoPrimaryIndexProperty() 2484 if self._match_text_seq("SQL"): 2485 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2486 return None 2487 2488 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2489 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2490 return exp.OnCommitProperty() 2491 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2492 return exp.OnCommitProperty(delete=True) 2493 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2494 2495 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2496 if self._match_text_seq("SQL", "DATA"): 2497 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2498 return None 2499 2500 def _parse_distkey(self) -> exp.DistKeyProperty: 2501 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2502 2503 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2504 table = self._parse_table(schema=True) 2505 2506 options = [] 2507 while self._match_texts(("INCLUDING", "EXCLUDING")): 2508 this = self._prev.text.upper() 2509 2510 id_var = self._parse_id_var() 2511 if not id_var: 2512 return None 2513 2514 options.append( 2515 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2516 ) 2517 2518 return self.expression(exp.LikeProperty, this=table, expressions=options) 2519 2520 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2521 return self.expression( 2522 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2523 ) 2524 2525 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2526 self._match(TokenType.EQ) 2527 return self.expression( 2528 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2529 ) 2530 2531 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2532 self._match_text_seq("WITH", "CONNECTION") 2533 return self.expression( 2534 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2535 ) 2536 2537 def _parse_returns(self) -> exp.ReturnsProperty: 2538 value: t.Optional[exp.Expression] 2539 null = None 2540 is_table = self._match(TokenType.TABLE) 2541 2542 if is_table: 2543 if self._match(TokenType.LT): 2544 value = self.expression( 2545 exp.Schema, 2546 this="TABLE", 2547 expressions=self._parse_csv(self._parse_struct_types), 2548 ) 2549 if not self._match(TokenType.GT): 2550 self.raise_error("Expecting >") 2551 else: 2552 value = self._parse_schema(exp.var("TABLE")) 2553 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2554 null = True 2555 value = None 2556 else: 2557 value = self._parse_types() 2558 2559 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2560 2561 def _parse_describe(self) -> exp.Describe: 2562 kind = self._match_set(self.CREATABLES) and self._prev.text 2563 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2564 if self._match(TokenType.DOT): 2565 style = None 2566 self._retreat(self._index - 2) 2567 this = self._parse_table(schema=True) 2568 properties = self._parse_properties() 2569 expressions = properties.expressions if properties else None 2570 partition = self._parse_partition() 2571 return self.expression( 2572 exp.Describe, 2573 this=this, 2574 style=style, 2575 kind=kind, 2576 expressions=expressions, 2577 partition=partition, 2578 ) 2579 2580 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2581 kind = self._prev.text.upper() 2582 expressions = [] 2583 2584 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2585 if self._match(TokenType.WHEN): 2586 expression = self._parse_disjunction() 2587 self._match(TokenType.THEN) 2588 else: 2589 expression = None 2590 2591 else_ = self._match(TokenType.ELSE) 2592 2593 if not self._match(TokenType.INTO): 2594 return None 2595 2596 return self.expression( 2597 exp.ConditionalInsert, 2598 this=self.expression( 2599 exp.Insert, 2600 this=self._parse_table(schema=True), 2601 expression=self._parse_derived_table_values(), 2602 ), 2603 expression=expression, 2604 else_=else_, 2605 ) 2606 2607 expression = parse_conditional_insert() 2608 while expression is not None: 2609 expressions.append(expression) 2610 expression = parse_conditional_insert() 2611 2612 return self.expression( 2613 exp.MultitableInserts, 2614 kind=kind, 2615 comments=comments, 2616 expressions=expressions, 2617 source=self._parse_table(), 2618 ) 2619 2620 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2621 comments = ensure_list(self._prev_comments) 2622 hint = self._parse_hint() 2623 overwrite = self._match(TokenType.OVERWRITE) 2624 ignore = self._match(TokenType.IGNORE) 2625 local = self._match_text_seq("LOCAL") 2626 alternative = None 2627 is_function = None 2628 2629 if self._match_text_seq("DIRECTORY"): 2630 this: t.Optional[exp.Expression] = self.expression( 2631 exp.Directory, 2632 this=self._parse_var_or_string(), 2633 local=local, 2634 row_format=self._parse_row_format(match_row=True), 2635 ) 2636 else: 2637 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2638 comments += ensure_list(self._prev_comments) 2639 return self._parse_multitable_inserts(comments) 2640 2641 if self._match(TokenType.OR): 2642 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2643 2644 self._match(TokenType.INTO) 2645 comments += ensure_list(self._prev_comments) 2646 self._match(TokenType.TABLE) 2647 is_function = self._match(TokenType.FUNCTION) 2648 2649 this = ( 2650 self._parse_table(schema=True, parse_partition=True) 2651 if not is_function 2652 else self._parse_function() 2653 ) 2654 2655 returning = self._parse_returning() 2656 2657 return self.expression( 2658 exp.Insert, 2659 comments=comments, 2660 hint=hint, 2661 is_function=is_function, 2662 this=this, 2663 stored=self._match_text_seq("STORED") and self._parse_stored(), 2664 by_name=self._match_text_seq("BY", "NAME"), 2665 exists=self._parse_exists(), 2666 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2667 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2668 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2669 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2670 conflict=self._parse_on_conflict(), 2671 returning=returning or self._parse_returning(), 2672 overwrite=overwrite, 2673 alternative=alternative, 2674 ignore=ignore, 2675 source=self._match(TokenType.TABLE) and self._parse_table(), 2676 ) 2677 2678 def _parse_kill(self) -> exp.Kill: 2679 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2680 2681 return self.expression( 2682 exp.Kill, 2683 this=self._parse_primary(), 2684 kind=kind, 2685 ) 2686 2687 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2688 conflict = self._match_text_seq("ON", "CONFLICT") 2689 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2690 2691 if not conflict and not duplicate: 2692 return None 2693 2694 conflict_keys = None 2695 constraint = None 2696 2697 if conflict: 2698 if self._match_text_seq("ON", "CONSTRAINT"): 2699 constraint = self._parse_id_var() 2700 elif self._match(TokenType.L_PAREN): 2701 conflict_keys = self._parse_csv(self._parse_id_var) 2702 self._match_r_paren() 2703 2704 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2705 if self._prev.token_type == TokenType.UPDATE: 2706 self._match(TokenType.SET) 2707 expressions = self._parse_csv(self._parse_equality) 2708 else: 2709 expressions = None 2710 2711 return self.expression( 2712 exp.OnConflict, 2713 duplicate=duplicate, 2714 expressions=expressions, 2715 action=action, 2716 conflict_keys=conflict_keys, 2717 constraint=constraint, 2718 ) 2719 2720 def _parse_returning(self) -> t.Optional[exp.Returning]: 2721 if not self._match(TokenType.RETURNING): 2722 return None 2723 return self.expression( 2724 exp.Returning, 2725 expressions=self._parse_csv(self._parse_expression), 2726 into=self._match(TokenType.INTO) and self._parse_table_part(), 2727 ) 2728 2729 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2730 if not self._match(TokenType.FORMAT): 2731 return None 2732 return self._parse_row_format() 2733 2734 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2735 index = self._index 2736 with_ = with_ or self._match_text_seq("WITH") 2737 2738 if not self._match(TokenType.SERDE_PROPERTIES): 2739 self._retreat(index) 2740 return None 2741 return self.expression( 2742 exp.SerdeProperties, 2743 **{ # type: ignore 2744 "expressions": self._parse_wrapped_properties(), 2745 "with": with_, 2746 }, 2747 ) 2748 2749 def _parse_row_format( 2750 self, match_row: bool = False 2751 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2752 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2753 return None 2754 2755 if self._match_text_seq("SERDE"): 2756 this = self._parse_string() 2757 2758 serde_properties = self._parse_serde_properties() 2759 2760 return self.expression( 2761 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2762 ) 2763 2764 self._match_text_seq("DELIMITED") 2765 2766 kwargs = {} 2767 2768 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2769 kwargs["fields"] = self._parse_string() 2770 if self._match_text_seq("ESCAPED", "BY"): 2771 kwargs["escaped"] = self._parse_string() 2772 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2773 kwargs["collection_items"] = self._parse_string() 2774 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2775 kwargs["map_keys"] = self._parse_string() 2776 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2777 kwargs["lines"] = self._parse_string() 2778 if self._match_text_seq("NULL", "DEFINED", "AS"): 2779 kwargs["null"] = self._parse_string() 2780 2781 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2782 2783 def _parse_load(self) -> exp.LoadData | exp.Command: 2784 if self._match_text_seq("DATA"): 2785 local = self._match_text_seq("LOCAL") 2786 self._match_text_seq("INPATH") 2787 inpath = self._parse_string() 2788 overwrite = self._match(TokenType.OVERWRITE) 2789 self._match_pair(TokenType.INTO, TokenType.TABLE) 2790 2791 return self.expression( 2792 exp.LoadData, 2793 this=self._parse_table(schema=True), 2794 local=local, 2795 overwrite=overwrite, 2796 inpath=inpath, 2797 partition=self._parse_partition(), 2798 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2799 serde=self._match_text_seq("SERDE") and self._parse_string(), 2800 ) 2801 return self._parse_as_command(self._prev) 2802 2803 def _parse_delete(self) -> exp.Delete: 2804 # This handles MySQL's "Multiple-Table Syntax" 2805 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2806 tables = None 2807 comments = self._prev_comments 2808 if not self._match(TokenType.FROM, advance=False): 2809 tables = self._parse_csv(self._parse_table) or None 2810 2811 returning = self._parse_returning() 2812 2813 return self.expression( 2814 exp.Delete, 2815 comments=comments, 2816 tables=tables, 2817 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2818 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2819 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2820 where=self._parse_where(), 2821 returning=returning or self._parse_returning(), 2822 limit=self._parse_limit(), 2823 ) 2824 2825 def _parse_update(self) -> exp.Update: 2826 comments = self._prev_comments 2827 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2828 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2829 returning = self._parse_returning() 2830 return self.expression( 2831 exp.Update, 2832 comments=comments, 2833 **{ # type: ignore 2834 "this": this, 2835 "expressions": expressions, 2836 "from": self._parse_from(joins=True), 2837 "where": self._parse_where(), 2838 "returning": returning or self._parse_returning(), 2839 "order": self._parse_order(), 2840 "limit": self._parse_limit(), 2841 }, 2842 ) 2843 2844 def _parse_uncache(self) -> exp.Uncache: 2845 if not self._match(TokenType.TABLE): 2846 self.raise_error("Expecting TABLE after UNCACHE") 2847 2848 return self.expression( 2849 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2850 ) 2851 2852 def _parse_cache(self) -> exp.Cache: 2853 lazy = self._match_text_seq("LAZY") 2854 self._match(TokenType.TABLE) 2855 table = self._parse_table(schema=True) 2856 2857 options = [] 2858 if self._match_text_seq("OPTIONS"): 2859 self._match_l_paren() 2860 k = self._parse_string() 2861 self._match(TokenType.EQ) 2862 v = self._parse_string() 2863 options = [k, v] 2864 self._match_r_paren() 2865 2866 self._match(TokenType.ALIAS) 2867 return self.expression( 2868 exp.Cache, 2869 this=table, 2870 lazy=lazy, 2871 options=options, 2872 expression=self._parse_select(nested=True), 2873 ) 2874 2875 def _parse_partition(self) -> t.Optional[exp.Partition]: 2876 if not self._match(TokenType.PARTITION): 2877 return None 2878 2879 return self.expression( 2880 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2881 ) 2882 2883 def _parse_value(self) -> t.Optional[exp.Tuple]: 2884 if self._match(TokenType.L_PAREN): 2885 expressions = self._parse_csv(self._parse_expression) 2886 self._match_r_paren() 2887 return self.expression(exp.Tuple, expressions=expressions) 2888 2889 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2890 expression = self._parse_expression() 2891 if expression: 2892 return self.expression(exp.Tuple, expressions=[expression]) 2893 return None 2894 2895 def _parse_projections(self) -> t.List[exp.Expression]: 2896 return self._parse_expressions() 2897 2898 def _parse_select( 2899 self, 2900 nested: bool = False, 2901 table: bool = False, 2902 parse_subquery_alias: bool = True, 2903 parse_set_operation: bool = True, 2904 ) -> t.Optional[exp.Expression]: 2905 cte = self._parse_with() 2906 2907 if cte: 2908 this = self._parse_statement() 2909 2910 if not this: 2911 self.raise_error("Failed to parse any statement following CTE") 2912 return cte 2913 2914 if "with" in this.arg_types: 2915 this.set("with", cte) 2916 else: 2917 self.raise_error(f"{this.key} does not support CTE") 2918 this = cte 2919 2920 return this 2921 2922 # duckdb supports leading with FROM x 2923 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2924 2925 if self._match(TokenType.SELECT): 2926 comments = self._prev_comments 2927 2928 hint = self._parse_hint() 2929 2930 if self._next and not self._next.token_type == TokenType.DOT: 2931 all_ = self._match(TokenType.ALL) 2932 distinct = self._match_set(self.DISTINCT_TOKENS) 2933 else: 2934 all_, distinct = None, None 2935 2936 kind = ( 2937 self._match(TokenType.ALIAS) 2938 and self._match_texts(("STRUCT", "VALUE")) 2939 and self._prev.text.upper() 2940 ) 2941 2942 if distinct: 2943 distinct = self.expression( 2944 exp.Distinct, 2945 on=self._parse_value() if self._match(TokenType.ON) else None, 2946 ) 2947 2948 if all_ and distinct: 2949 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2950 2951 limit = self._parse_limit(top=True) 2952 projections = self._parse_projections() 2953 2954 this = self.expression( 2955 exp.Select, 2956 kind=kind, 2957 hint=hint, 2958 distinct=distinct, 2959 expressions=projections, 2960 limit=limit, 2961 ) 2962 this.comments = comments 2963 2964 into = self._parse_into() 2965 if into: 2966 this.set("into", into) 2967 2968 if not from_: 2969 from_ = self._parse_from() 2970 2971 if from_: 2972 this.set("from", from_) 2973 2974 this = self._parse_query_modifiers(this) 2975 elif (table or nested) and self._match(TokenType.L_PAREN): 2976 if self._match(TokenType.PIVOT): 2977 this = self._parse_simplified_pivot() 2978 elif self._match(TokenType.FROM): 2979 this = exp.select("*").from_( 2980 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2981 ) 2982 else: 2983 this = ( 2984 self._parse_table() 2985 if table 2986 else self._parse_select(nested=True, parse_set_operation=False) 2987 ) 2988 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2989 2990 self._match_r_paren() 2991 2992 # We return early here so that the UNION isn't attached to the subquery by the 2993 # following call to _parse_set_operations, but instead becomes the parent node 2994 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2995 elif self._match(TokenType.VALUES, advance=False): 2996 this = self._parse_derived_table_values() 2997 elif from_: 2998 this = exp.select("*").from_(from_.this, copy=False) 2999 elif self._match(TokenType.SUMMARIZE): 3000 table = self._match(TokenType.TABLE) 3001 this = self._parse_select() or self._parse_string() or self._parse_table() 3002 return self.expression(exp.Summarize, this=this, table=table) 3003 elif self._match(TokenType.DESCRIBE): 3004 this = self._parse_describe() 3005 elif self._match_text_seq("STREAM"): 3006 this = self.expression(exp.Stream, this=self._parse_function()) 3007 else: 3008 this = None 3009 3010 return self._parse_set_operations(this) if parse_set_operation else this 3011 3012 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3013 if not skip_with_token and not self._match(TokenType.WITH): 3014 return None 3015 3016 comments = self._prev_comments 3017 recursive = self._match(TokenType.RECURSIVE) 3018 3019 expressions = [] 3020 while True: 3021 expressions.append(self._parse_cte()) 3022 3023 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3024 break 3025 else: 3026 self._match(TokenType.WITH) 3027 3028 return self.expression( 3029 exp.With, comments=comments, expressions=expressions, recursive=recursive 3030 ) 3031 3032 def _parse_cte(self) -> exp.CTE: 3033 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3034 if not alias or not alias.this: 3035 self.raise_error("Expected CTE to have alias") 3036 3037 self._match(TokenType.ALIAS) 3038 comments = self._prev_comments 3039 3040 if self._match_text_seq("NOT", "MATERIALIZED"): 3041 materialized = False 3042 elif self._match_text_seq("MATERIALIZED"): 3043 materialized = True 3044 else: 3045 materialized = None 3046 3047 return self.expression( 3048 exp.CTE, 3049 this=self._parse_wrapped(self._parse_statement), 3050 alias=alias, 3051 materialized=materialized, 3052 comments=comments, 3053 ) 3054 3055 def _parse_table_alias( 3056 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3057 ) -> t.Optional[exp.TableAlias]: 3058 any_token = self._match(TokenType.ALIAS) 3059 alias = ( 3060 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3061 or self._parse_string_as_identifier() 3062 ) 3063 3064 index = self._index 3065 if self._match(TokenType.L_PAREN): 3066 columns = self._parse_csv(self._parse_function_parameter) 3067 self._match_r_paren() if columns else self._retreat(index) 3068 else: 3069 columns = None 3070 3071 if not alias and not columns: 3072 return None 3073 3074 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3075 3076 # We bubble up comments from the Identifier to the TableAlias 3077 if isinstance(alias, exp.Identifier): 3078 table_alias.add_comments(alias.pop_comments()) 3079 3080 return table_alias 3081 3082 def _parse_subquery( 3083 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3084 ) -> t.Optional[exp.Subquery]: 3085 if not this: 3086 return None 3087 3088 return self.expression( 3089 exp.Subquery, 3090 this=this, 3091 pivots=self._parse_pivots(), 3092 alias=self._parse_table_alias() if parse_alias else None, 3093 sample=self._parse_table_sample(), 3094 ) 3095 3096 def _implicit_unnests_to_explicit(self, this: E) -> E: 3097 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3098 3099 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3100 for i, join in enumerate(this.args.get("joins") or []): 3101 table = join.this 3102 normalized_table = table.copy() 3103 normalized_table.meta["maybe_column"] = True 3104 normalized_table = _norm(normalized_table, dialect=self.dialect) 3105 3106 if isinstance(table, exp.Table) and not join.args.get("on"): 3107 if normalized_table.parts[0].name in refs: 3108 table_as_column = table.to_column() 3109 unnest = exp.Unnest(expressions=[table_as_column]) 3110 3111 # Table.to_column creates a parent Alias node that we want to convert to 3112 # a TableAlias and attach to the Unnest, so it matches the parser's output 3113 if isinstance(table.args.get("alias"), exp.TableAlias): 3114 table_as_column.replace(table_as_column.this) 3115 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3116 3117 table.replace(unnest) 3118 3119 refs.add(normalized_table.alias_or_name) 3120 3121 return this 3122 3123 def _parse_query_modifiers( 3124 self, this: t.Optional[exp.Expression] 3125 ) -> t.Optional[exp.Expression]: 3126 if isinstance(this, (exp.Query, exp.Table)): 3127 for join in self._parse_joins(): 3128 this.append("joins", join) 3129 for lateral in iter(self._parse_lateral, None): 3130 this.append("laterals", lateral) 3131 3132 while True: 3133 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3134 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3135 key, expression = parser(self) 3136 3137 if expression: 3138 this.set(key, expression) 3139 if key == "limit": 3140 offset = expression.args.pop("offset", None) 3141 3142 if offset: 3143 offset = exp.Offset(expression=offset) 3144 this.set("offset", offset) 3145 3146 limit_by_expressions = expression.expressions 3147 expression.set("expressions", None) 3148 offset.set("expressions", limit_by_expressions) 3149 continue 3150 break 3151 3152 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3153 this = self._implicit_unnests_to_explicit(this) 3154 3155 return this 3156 3157 def _parse_hint(self) -> t.Optional[exp.Hint]: 3158 if self._match(TokenType.HINT): 3159 hints = [] 3160 for hint in iter( 3161 lambda: self._parse_csv( 3162 lambda: self._parse_function() or self._parse_var(upper=True) 3163 ), 3164 [], 3165 ): 3166 hints.extend(hint) 3167 3168 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3169 self.raise_error("Expected */ after HINT") 3170 3171 return self.expression(exp.Hint, expressions=hints) 3172 3173 return None 3174 3175 def _parse_into(self) -> t.Optional[exp.Into]: 3176 if not self._match(TokenType.INTO): 3177 return None 3178 3179 temp = self._match(TokenType.TEMPORARY) 3180 unlogged = self._match_text_seq("UNLOGGED") 3181 self._match(TokenType.TABLE) 3182 3183 return self.expression( 3184 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3185 ) 3186 3187 def _parse_from( 3188 self, joins: bool = False, skip_from_token: bool = False 3189 ) -> t.Optional[exp.From]: 3190 if not skip_from_token and not self._match(TokenType.FROM): 3191 return None 3192 3193 return self.expression( 3194 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3195 ) 3196 3197 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3198 return self.expression( 3199 exp.MatchRecognizeMeasure, 3200 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3201 this=self._parse_expression(), 3202 ) 3203 3204 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3205 if not self._match(TokenType.MATCH_RECOGNIZE): 3206 return None 3207 3208 self._match_l_paren() 3209 3210 partition = self._parse_partition_by() 3211 order = self._parse_order() 3212 3213 measures = ( 3214 self._parse_csv(self._parse_match_recognize_measure) 3215 if self._match_text_seq("MEASURES") 3216 else None 3217 ) 3218 3219 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3220 rows = exp.var("ONE ROW PER MATCH") 3221 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3222 text = "ALL ROWS PER MATCH" 3223 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3224 text += " SHOW EMPTY MATCHES" 3225 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3226 text += " OMIT EMPTY MATCHES" 3227 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3228 text += " WITH UNMATCHED ROWS" 3229 rows = exp.var(text) 3230 else: 3231 rows = None 3232 3233 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3234 text = "AFTER MATCH SKIP" 3235 if self._match_text_seq("PAST", "LAST", "ROW"): 3236 text += " PAST LAST ROW" 3237 elif self._match_text_seq("TO", "NEXT", "ROW"): 3238 text += " TO NEXT ROW" 3239 elif self._match_text_seq("TO", "FIRST"): 3240 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3241 elif self._match_text_seq("TO", "LAST"): 3242 text += f" TO LAST {self._advance_any().text}" # type: ignore 3243 after = exp.var(text) 3244 else: 3245 after = None 3246 3247 if self._match_text_seq("PATTERN"): 3248 self._match_l_paren() 3249 3250 if not self._curr: 3251 self.raise_error("Expecting )", self._curr) 3252 3253 paren = 1 3254 start = self._curr 3255 3256 while self._curr and paren > 0: 3257 if self._curr.token_type == TokenType.L_PAREN: 3258 paren += 1 3259 if self._curr.token_type == TokenType.R_PAREN: 3260 paren -= 1 3261 3262 end = self._prev 3263 self._advance() 3264 3265 if paren > 0: 3266 self.raise_error("Expecting )", self._curr) 3267 3268 pattern = exp.var(self._find_sql(start, end)) 3269 else: 3270 pattern = None 3271 3272 define = ( 3273 self._parse_csv(self._parse_name_as_expression) 3274 if self._match_text_seq("DEFINE") 3275 else None 3276 ) 3277 3278 self._match_r_paren() 3279 3280 return self.expression( 3281 exp.MatchRecognize, 3282 partition_by=partition, 3283 order=order, 3284 measures=measures, 3285 rows=rows, 3286 after=after, 3287 pattern=pattern, 3288 define=define, 3289 alias=self._parse_table_alias(), 3290 ) 3291 3292 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3293 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3294 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3295 cross_apply = False 3296 3297 if cross_apply is not None: 3298 this = self._parse_select(table=True) 3299 view = None 3300 outer = None 3301 elif self._match(TokenType.LATERAL): 3302 this = self._parse_select(table=True) 3303 view = self._match(TokenType.VIEW) 3304 outer = self._match(TokenType.OUTER) 3305 else: 3306 return None 3307 3308 if not this: 3309 this = ( 3310 self._parse_unnest() 3311 or self._parse_function() 3312 or self._parse_id_var(any_token=False) 3313 ) 3314 3315 while self._match(TokenType.DOT): 3316 this = exp.Dot( 3317 this=this, 3318 expression=self._parse_function() or self._parse_id_var(any_token=False), 3319 ) 3320 3321 if view: 3322 table = self._parse_id_var(any_token=False) 3323 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3324 table_alias: t.Optional[exp.TableAlias] = self.expression( 3325 exp.TableAlias, this=table, columns=columns 3326 ) 3327 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3328 # We move the alias from the lateral's child node to the lateral itself 3329 table_alias = this.args["alias"].pop() 3330 else: 3331 table_alias = self._parse_table_alias() 3332 3333 return self.expression( 3334 exp.Lateral, 3335 this=this, 3336 view=view, 3337 outer=outer, 3338 alias=table_alias, 3339 cross_apply=cross_apply, 3340 ) 3341 3342 def _parse_join_parts( 3343 self, 3344 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3345 return ( 3346 self._match_set(self.JOIN_METHODS) and self._prev, 3347 self._match_set(self.JOIN_SIDES) and self._prev, 3348 self._match_set(self.JOIN_KINDS) and self._prev, 3349 ) 3350 3351 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3352 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3353 this = self._parse_column() 3354 if isinstance(this, exp.Column): 3355 return this.this 3356 return this 3357 3358 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3359 3360 def _parse_join( 3361 self, skip_join_token: bool = False, parse_bracket: bool = False 3362 ) -> t.Optional[exp.Join]: 3363 if self._match(TokenType.COMMA): 3364 return self.expression(exp.Join, this=self._parse_table()) 3365 3366 index = self._index 3367 method, side, kind = self._parse_join_parts() 3368 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3369 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3370 3371 if not skip_join_token and not join: 3372 self._retreat(index) 3373 kind = None 3374 method = None 3375 side = None 3376 3377 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3378 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3379 3380 if not skip_join_token and not join and not outer_apply and not cross_apply: 3381 return None 3382 3383 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3384 3385 if method: 3386 kwargs["method"] = method.text 3387 if side: 3388 kwargs["side"] = side.text 3389 if kind: 3390 kwargs["kind"] = kind.text 3391 if hint: 3392 kwargs["hint"] = hint 3393 3394 if self._match(TokenType.MATCH_CONDITION): 3395 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3396 3397 if self._match(TokenType.ON): 3398 kwargs["on"] = self._parse_assignment() 3399 elif self._match(TokenType.USING): 3400 kwargs["using"] = self._parse_using_identifiers() 3401 elif ( 3402 not (outer_apply or cross_apply) 3403 and not isinstance(kwargs["this"], exp.Unnest) 3404 and not (kind and kind.token_type == TokenType.CROSS) 3405 ): 3406 index = self._index 3407 joins: t.Optional[list] = list(self._parse_joins()) 3408 3409 if joins and self._match(TokenType.ON): 3410 kwargs["on"] = self._parse_assignment() 3411 elif joins and self._match(TokenType.USING): 3412 kwargs["using"] = self._parse_using_identifiers() 3413 else: 3414 joins = None 3415 self._retreat(index) 3416 3417 kwargs["this"].set("joins", joins if joins else None) 3418 3419 comments = [c for token in (method, side, kind) if token for c in token.comments] 3420 return self.expression(exp.Join, comments=comments, **kwargs) 3421 3422 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3423 this = self._parse_assignment() 3424 3425 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3426 return this 3427 3428 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3429 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3430 3431 return this 3432 3433 def _parse_index_params(self) -> exp.IndexParameters: 3434 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3435 3436 if self._match(TokenType.L_PAREN, advance=False): 3437 columns = self._parse_wrapped_csv(self._parse_with_operator) 3438 else: 3439 columns = None 3440 3441 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3442 partition_by = self._parse_partition_by() 3443 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3444 tablespace = ( 3445 self._parse_var(any_token=True) 3446 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3447 else None 3448 ) 3449 where = self._parse_where() 3450 3451 on = self._parse_field() if self._match(TokenType.ON) else None 3452 3453 return self.expression( 3454 exp.IndexParameters, 3455 using=using, 3456 columns=columns, 3457 include=include, 3458 partition_by=partition_by, 3459 where=where, 3460 with_storage=with_storage, 3461 tablespace=tablespace, 3462 on=on, 3463 ) 3464 3465 def _parse_index( 3466 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3467 ) -> t.Optional[exp.Index]: 3468 if index or anonymous: 3469 unique = None 3470 primary = None 3471 amp = None 3472 3473 self._match(TokenType.ON) 3474 self._match(TokenType.TABLE) # hive 3475 table = self._parse_table_parts(schema=True) 3476 else: 3477 unique = self._match(TokenType.UNIQUE) 3478 primary = self._match_text_seq("PRIMARY") 3479 amp = self._match_text_seq("AMP") 3480 3481 if not self._match(TokenType.INDEX): 3482 return None 3483 3484 index = self._parse_id_var() 3485 table = None 3486 3487 params = self._parse_index_params() 3488 3489 return self.expression( 3490 exp.Index, 3491 this=index, 3492 table=table, 3493 unique=unique, 3494 primary=primary, 3495 amp=amp, 3496 params=params, 3497 ) 3498 3499 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3500 hints: t.List[exp.Expression] = [] 3501 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3502 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3503 hints.append( 3504 self.expression( 3505 exp.WithTableHint, 3506 expressions=self._parse_csv( 3507 lambda: self._parse_function() or self._parse_var(any_token=True) 3508 ), 3509 ) 3510 ) 3511 self._match_r_paren() 3512 else: 3513 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3514 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3515 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3516 3517 self._match_set((TokenType.INDEX, TokenType.KEY)) 3518 if self._match(TokenType.FOR): 3519 hint.set("target", self._advance_any() and self._prev.text.upper()) 3520 3521 hint.set("expressions", self._parse_wrapped_id_vars()) 3522 hints.append(hint) 3523 3524 return hints or None 3525 3526 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3527 return ( 3528 (not schema and self._parse_function(optional_parens=False)) 3529 or self._parse_id_var(any_token=False) 3530 or self._parse_string_as_identifier() 3531 or self._parse_placeholder() 3532 ) 3533 3534 def _parse_table_parts( 3535 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3536 ) -> exp.Table: 3537 catalog = None 3538 db = None 3539 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3540 3541 while self._match(TokenType.DOT): 3542 if catalog: 3543 # This allows nesting the table in arbitrarily many dot expressions if needed 3544 table = self.expression( 3545 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3546 ) 3547 else: 3548 catalog = db 3549 db = table 3550 # "" used for tsql FROM a..b case 3551 table = self._parse_table_part(schema=schema) or "" 3552 3553 if ( 3554 wildcard 3555 and self._is_connected() 3556 and (isinstance(table, exp.Identifier) or not table) 3557 and self._match(TokenType.STAR) 3558 ): 3559 if isinstance(table, exp.Identifier): 3560 table.args["this"] += "*" 3561 else: 3562 table = exp.Identifier(this="*") 3563 3564 # We bubble up comments from the Identifier to the Table 3565 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3566 3567 if is_db_reference: 3568 catalog = db 3569 db = table 3570 table = None 3571 3572 if not table and not is_db_reference: 3573 self.raise_error(f"Expected table name but got {self._curr}") 3574 if not db and is_db_reference: 3575 self.raise_error(f"Expected database name but got {self._curr}") 3576 3577 table = self.expression( 3578 exp.Table, 3579 comments=comments, 3580 this=table, 3581 db=db, 3582 catalog=catalog, 3583 ) 3584 3585 changes = self._parse_changes() 3586 if changes: 3587 table.set("changes", changes) 3588 3589 at_before = self._parse_historical_data() 3590 if at_before: 3591 table.set("when", at_before) 3592 3593 pivots = self._parse_pivots() 3594 if pivots: 3595 table.set("pivots", pivots) 3596 3597 return table 3598 3599 def _parse_table( 3600 self, 3601 schema: bool = False, 3602 joins: bool = False, 3603 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3604 parse_bracket: bool = False, 3605 is_db_reference: bool = False, 3606 parse_partition: bool = False, 3607 ) -> t.Optional[exp.Expression]: 3608 lateral = self._parse_lateral() 3609 if lateral: 3610 return lateral 3611 3612 unnest = self._parse_unnest() 3613 if unnest: 3614 return unnest 3615 3616 values = self._parse_derived_table_values() 3617 if values: 3618 return values 3619 3620 subquery = self._parse_select(table=True) 3621 if subquery: 3622 if not subquery.args.get("pivots"): 3623 subquery.set("pivots", self._parse_pivots()) 3624 return subquery 3625 3626 bracket = parse_bracket and self._parse_bracket(None) 3627 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3628 3629 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3630 self._parse_table 3631 ) 3632 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3633 3634 only = self._match(TokenType.ONLY) 3635 3636 this = t.cast( 3637 exp.Expression, 3638 bracket 3639 or rows_from 3640 or self._parse_bracket( 3641 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3642 ), 3643 ) 3644 3645 if only: 3646 this.set("only", only) 3647 3648 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3649 self._match_text_seq("*") 3650 3651 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3652 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3653 this.set("partition", self._parse_partition()) 3654 3655 if schema: 3656 return self._parse_schema(this=this) 3657 3658 version = self._parse_version() 3659 3660 if version: 3661 this.set("version", version) 3662 3663 if self.dialect.ALIAS_POST_TABLESAMPLE: 3664 this.set("sample", self._parse_table_sample()) 3665 3666 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3667 if alias: 3668 this.set("alias", alias) 3669 3670 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3671 return self.expression( 3672 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3673 ) 3674 3675 this.set("hints", self._parse_table_hints()) 3676 3677 if not this.args.get("pivots"): 3678 this.set("pivots", self._parse_pivots()) 3679 3680 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3681 this.set("sample", self._parse_table_sample()) 3682 3683 if joins: 3684 for join in self._parse_joins(): 3685 this.append("joins", join) 3686 3687 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3688 this.set("ordinality", True) 3689 this.set("alias", self._parse_table_alias()) 3690 3691 return this 3692 3693 def _parse_version(self) -> t.Optional[exp.Version]: 3694 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3695 this = "TIMESTAMP" 3696 elif self._match(TokenType.VERSION_SNAPSHOT): 3697 this = "VERSION" 3698 else: 3699 return None 3700 3701 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3702 kind = self._prev.text.upper() 3703 start = self._parse_bitwise() 3704 self._match_texts(("TO", "AND")) 3705 end = self._parse_bitwise() 3706 expression: t.Optional[exp.Expression] = self.expression( 3707 exp.Tuple, expressions=[start, end] 3708 ) 3709 elif self._match_text_seq("CONTAINED", "IN"): 3710 kind = "CONTAINED IN" 3711 expression = self.expression( 3712 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3713 ) 3714 elif self._match(TokenType.ALL): 3715 kind = "ALL" 3716 expression = None 3717 else: 3718 self._match_text_seq("AS", "OF") 3719 kind = "AS OF" 3720 expression = self._parse_type() 3721 3722 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3723 3724 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3725 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3726 index = self._index 3727 historical_data = None 3728 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3729 this = self._prev.text.upper() 3730 kind = ( 3731 self._match(TokenType.L_PAREN) 3732 and self._match_texts(self.HISTORICAL_DATA_KIND) 3733 and self._prev.text.upper() 3734 ) 3735 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3736 3737 if expression: 3738 self._match_r_paren() 3739 historical_data = self.expression( 3740 exp.HistoricalData, this=this, kind=kind, expression=expression 3741 ) 3742 else: 3743 self._retreat(index) 3744 3745 return historical_data 3746 3747 def _parse_changes(self) -> t.Optional[exp.Changes]: 3748 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3749 return None 3750 3751 information = self._parse_var(any_token=True) 3752 self._match_r_paren() 3753 3754 return self.expression( 3755 exp.Changes, 3756 information=information, 3757 at_before=self._parse_historical_data(), 3758 end=self._parse_historical_data(), 3759 ) 3760 3761 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3762 if not self._match(TokenType.UNNEST): 3763 return None 3764 3765 expressions = self._parse_wrapped_csv(self._parse_equality) 3766 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3767 3768 alias = self._parse_table_alias() if with_alias else None 3769 3770 if alias: 3771 if self.dialect.UNNEST_COLUMN_ONLY: 3772 if alias.args.get("columns"): 3773 self.raise_error("Unexpected extra column alias in unnest.") 3774 3775 alias.set("columns", [alias.this]) 3776 alias.set("this", None) 3777 3778 columns = alias.args.get("columns") or [] 3779 if offset and len(expressions) < len(columns): 3780 offset = columns.pop() 3781 3782 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3783 self._match(TokenType.ALIAS) 3784 offset = self._parse_id_var( 3785 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3786 ) or exp.to_identifier("offset") 3787 3788 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3789 3790 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3791 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3792 if not is_derived and not ( 3793 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3794 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3795 ): 3796 return None 3797 3798 expressions = self._parse_csv(self._parse_value) 3799 alias = self._parse_table_alias() 3800 3801 if is_derived: 3802 self._match_r_paren() 3803 3804 return self.expression( 3805 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3806 ) 3807 3808 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3809 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3810 as_modifier and self._match_text_seq("USING", "SAMPLE") 3811 ): 3812 return None 3813 3814 bucket_numerator = None 3815 bucket_denominator = None 3816 bucket_field = None 3817 percent = None 3818 size = None 3819 seed = None 3820 3821 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3822 matched_l_paren = self._match(TokenType.L_PAREN) 3823 3824 if self.TABLESAMPLE_CSV: 3825 num = None 3826 expressions = self._parse_csv(self._parse_primary) 3827 else: 3828 expressions = None 3829 num = ( 3830 self._parse_factor() 3831 if self._match(TokenType.NUMBER, advance=False) 3832 else self._parse_primary() or self._parse_placeholder() 3833 ) 3834 3835 if self._match_text_seq("BUCKET"): 3836 bucket_numerator = self._parse_number() 3837 self._match_text_seq("OUT", "OF") 3838 bucket_denominator = bucket_denominator = self._parse_number() 3839 self._match(TokenType.ON) 3840 bucket_field = self._parse_field() 3841 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3842 percent = num 3843 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3844 size = num 3845 else: 3846 percent = num 3847 3848 if matched_l_paren: 3849 self._match_r_paren() 3850 3851 if self._match(TokenType.L_PAREN): 3852 method = self._parse_var(upper=True) 3853 seed = self._match(TokenType.COMMA) and self._parse_number() 3854 self._match_r_paren() 3855 elif self._match_texts(("SEED", "REPEATABLE")): 3856 seed = self._parse_wrapped(self._parse_number) 3857 3858 if not method and self.DEFAULT_SAMPLING_METHOD: 3859 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3860 3861 return self.expression( 3862 exp.TableSample, 3863 expressions=expressions, 3864 method=method, 3865 bucket_numerator=bucket_numerator, 3866 bucket_denominator=bucket_denominator, 3867 bucket_field=bucket_field, 3868 percent=percent, 3869 size=size, 3870 seed=seed, 3871 ) 3872 3873 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3874 return list(iter(self._parse_pivot, None)) or None 3875 3876 def _parse_joins(self) -> t.Iterator[exp.Join]: 3877 return iter(self._parse_join, None) 3878 3879 # https://duckdb.org/docs/sql/statements/pivot 3880 def _parse_simplified_pivot(self) -> exp.Pivot: 3881 def _parse_on() -> t.Optional[exp.Expression]: 3882 this = self._parse_bitwise() 3883 return self._parse_in(this) if self._match(TokenType.IN) else this 3884 3885 this = self._parse_table() 3886 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3887 using = self._match(TokenType.USING) and self._parse_csv( 3888 lambda: self._parse_alias(self._parse_function()) 3889 ) 3890 group = self._parse_group() 3891 return self.expression( 3892 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3893 ) 3894 3895 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3896 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3897 this = self._parse_select_or_expression() 3898 3899 self._match(TokenType.ALIAS) 3900 alias = self._parse_bitwise() 3901 if alias: 3902 if isinstance(alias, exp.Column) and not alias.db: 3903 alias = alias.this 3904 return self.expression(exp.PivotAlias, this=this, alias=alias) 3905 3906 return this 3907 3908 value = self._parse_column() 3909 3910 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3911 self.raise_error("Expecting IN (") 3912 3913 if self._match(TokenType.ANY): 3914 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3915 else: 3916 exprs = self._parse_csv(_parse_aliased_expression) 3917 3918 self._match_r_paren() 3919 return self.expression(exp.In, this=value, expressions=exprs) 3920 3921 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3922 index = self._index 3923 include_nulls = None 3924 3925 if self._match(TokenType.PIVOT): 3926 unpivot = False 3927 elif self._match(TokenType.UNPIVOT): 3928 unpivot = True 3929 3930 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3931 if self._match_text_seq("INCLUDE", "NULLS"): 3932 include_nulls = True 3933 elif self._match_text_seq("EXCLUDE", "NULLS"): 3934 include_nulls = False 3935 else: 3936 return None 3937 3938 expressions = [] 3939 3940 if not self._match(TokenType.L_PAREN): 3941 self._retreat(index) 3942 return None 3943 3944 if unpivot: 3945 expressions = self._parse_csv(self._parse_column) 3946 else: 3947 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3948 3949 if not expressions: 3950 self.raise_error("Failed to parse PIVOT's aggregation list") 3951 3952 if not self._match(TokenType.FOR): 3953 self.raise_error("Expecting FOR") 3954 3955 field = self._parse_pivot_in() 3956 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3957 self._parse_bitwise 3958 ) 3959 3960 self._match_r_paren() 3961 3962 pivot = self.expression( 3963 exp.Pivot, 3964 expressions=expressions, 3965 field=field, 3966 unpivot=unpivot, 3967 include_nulls=include_nulls, 3968 default_on_null=default_on_null, 3969 ) 3970 3971 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3972 pivot.set("alias", self._parse_table_alias()) 3973 3974 if not unpivot: 3975 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3976 3977 columns: t.List[exp.Expression] = [] 3978 for fld in pivot.args["field"].expressions: 3979 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3980 for name in names: 3981 if self.PREFIXED_PIVOT_COLUMNS: 3982 name = f"{name}_{field_name}" if name else field_name 3983 else: 3984 name = f"{field_name}_{name}" if name else field_name 3985 3986 columns.append(exp.to_identifier(name)) 3987 3988 pivot.set("columns", columns) 3989 3990 return pivot 3991 3992 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3993 return [agg.alias for agg in aggregations] 3994 3995 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3996 if not skip_where_token and not self._match(TokenType.PREWHERE): 3997 return None 3998 3999 return self.expression( 4000 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4001 ) 4002 4003 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4004 if not skip_where_token and not self._match(TokenType.WHERE): 4005 return None 4006 4007 return self.expression( 4008 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4009 ) 4010 4011 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4012 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4013 return None 4014 4015 elements: t.Dict[str, t.Any] = defaultdict(list) 4016 4017 if self._match(TokenType.ALL): 4018 elements["all"] = True 4019 elif self._match(TokenType.DISTINCT): 4020 elements["all"] = False 4021 4022 while True: 4023 index = self._index 4024 4025 elements["expressions"].extend( 4026 self._parse_csv( 4027 lambda: None 4028 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4029 else self._parse_assignment() 4030 ) 4031 ) 4032 4033 before_with_index = self._index 4034 with_prefix = self._match(TokenType.WITH) 4035 4036 if self._match(TokenType.ROLLUP): 4037 elements["rollup"].append( 4038 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4039 ) 4040 elif self._match(TokenType.CUBE): 4041 elements["cube"].append( 4042 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4043 ) 4044 elif self._match(TokenType.GROUPING_SETS): 4045 elements["grouping_sets"].append( 4046 self.expression( 4047 exp.GroupingSets, 4048 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4049 ) 4050 ) 4051 elif self._match_text_seq("TOTALS"): 4052 elements["totals"] = True # type: ignore 4053 4054 if before_with_index <= self._index <= before_with_index + 1: 4055 self._retreat(before_with_index) 4056 break 4057 4058 if index == self._index: 4059 break 4060 4061 return self.expression(exp.Group, **elements) # type: ignore 4062 4063 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4064 return self.expression( 4065 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4066 ) 4067 4068 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4069 if self._match(TokenType.L_PAREN): 4070 grouping_set = self._parse_csv(self._parse_column) 4071 self._match_r_paren() 4072 return self.expression(exp.Tuple, expressions=grouping_set) 4073 4074 return self._parse_column() 4075 4076 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4077 if not skip_having_token and not self._match(TokenType.HAVING): 4078 return None 4079 return self.expression(exp.Having, this=self._parse_assignment()) 4080 4081 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4082 if not self._match(TokenType.QUALIFY): 4083 return None 4084 return self.expression(exp.Qualify, this=self._parse_assignment()) 4085 4086 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4087 if skip_start_token: 4088 start = None 4089 elif self._match(TokenType.START_WITH): 4090 start = self._parse_assignment() 4091 else: 4092 return None 4093 4094 self._match(TokenType.CONNECT_BY) 4095 nocycle = self._match_text_seq("NOCYCLE") 4096 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4097 exp.Prior, this=self._parse_bitwise() 4098 ) 4099 connect = self._parse_assignment() 4100 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4101 4102 if not start and self._match(TokenType.START_WITH): 4103 start = self._parse_assignment() 4104 4105 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4106 4107 def _parse_name_as_expression(self) -> exp.Alias: 4108 return self.expression( 4109 exp.Alias, 4110 alias=self._parse_id_var(any_token=True), 4111 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4112 ) 4113 4114 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4115 if self._match_text_seq("INTERPOLATE"): 4116 return self._parse_wrapped_csv(self._parse_name_as_expression) 4117 return None 4118 4119 def _parse_order( 4120 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4121 ) -> t.Optional[exp.Expression]: 4122 siblings = None 4123 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4124 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4125 return this 4126 4127 siblings = True 4128 4129 return self.expression( 4130 exp.Order, 4131 this=this, 4132 expressions=self._parse_csv(self._parse_ordered), 4133 siblings=siblings, 4134 ) 4135 4136 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4137 if not self._match(token): 4138 return None 4139 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4140 4141 def _parse_ordered( 4142 self, parse_method: t.Optional[t.Callable] = None 4143 ) -> t.Optional[exp.Ordered]: 4144 this = parse_method() if parse_method else self._parse_assignment() 4145 if not this: 4146 return None 4147 4148 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4149 this = exp.var("ALL") 4150 4151 asc = self._match(TokenType.ASC) 4152 desc = self._match(TokenType.DESC) or (asc and False) 4153 4154 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4155 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4156 4157 nulls_first = is_nulls_first or False 4158 explicitly_null_ordered = is_nulls_first or is_nulls_last 4159 4160 if ( 4161 not explicitly_null_ordered 4162 and ( 4163 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4164 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4165 ) 4166 and self.dialect.NULL_ORDERING != "nulls_are_last" 4167 ): 4168 nulls_first = True 4169 4170 if self._match_text_seq("WITH", "FILL"): 4171 with_fill = self.expression( 4172 exp.WithFill, 4173 **{ # type: ignore 4174 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4175 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4176 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4177 "interpolate": self._parse_interpolate(), 4178 }, 4179 ) 4180 else: 4181 with_fill = None 4182 4183 return self.expression( 4184 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4185 ) 4186 4187 def _parse_limit( 4188 self, 4189 this: t.Optional[exp.Expression] = None, 4190 top: bool = False, 4191 skip_limit_token: bool = False, 4192 ) -> t.Optional[exp.Expression]: 4193 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4194 comments = self._prev_comments 4195 if top: 4196 limit_paren = self._match(TokenType.L_PAREN) 4197 expression = self._parse_term() if limit_paren else self._parse_number() 4198 4199 if limit_paren: 4200 self._match_r_paren() 4201 else: 4202 expression = self._parse_term() 4203 4204 if self._match(TokenType.COMMA): 4205 offset = expression 4206 expression = self._parse_term() 4207 else: 4208 offset = None 4209 4210 limit_exp = self.expression( 4211 exp.Limit, 4212 this=this, 4213 expression=expression, 4214 offset=offset, 4215 comments=comments, 4216 expressions=self._parse_limit_by(), 4217 ) 4218 4219 return limit_exp 4220 4221 if self._match(TokenType.FETCH): 4222 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4223 direction = self._prev.text.upper() if direction else "FIRST" 4224 4225 count = self._parse_field(tokens=self.FETCH_TOKENS) 4226 percent = self._match(TokenType.PERCENT) 4227 4228 self._match_set((TokenType.ROW, TokenType.ROWS)) 4229 4230 only = self._match_text_seq("ONLY") 4231 with_ties = self._match_text_seq("WITH", "TIES") 4232 4233 if only and with_ties: 4234 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4235 4236 return self.expression( 4237 exp.Fetch, 4238 direction=direction, 4239 count=count, 4240 percent=percent, 4241 with_ties=with_ties, 4242 ) 4243 4244 return this 4245 4246 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4247 if not self._match(TokenType.OFFSET): 4248 return this 4249 4250 count = self._parse_term() 4251 self._match_set((TokenType.ROW, TokenType.ROWS)) 4252 4253 return self.expression( 4254 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4255 ) 4256 4257 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4258 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4259 4260 def _parse_locks(self) -> t.List[exp.Lock]: 4261 locks = [] 4262 while True: 4263 if self._match_text_seq("FOR", "UPDATE"): 4264 update = True 4265 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4266 "LOCK", "IN", "SHARE", "MODE" 4267 ): 4268 update = False 4269 else: 4270 break 4271 4272 expressions = None 4273 if self._match_text_seq("OF"): 4274 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4275 4276 wait: t.Optional[bool | exp.Expression] = None 4277 if self._match_text_seq("NOWAIT"): 4278 wait = True 4279 elif self._match_text_seq("WAIT"): 4280 wait = self._parse_primary() 4281 elif self._match_text_seq("SKIP", "LOCKED"): 4282 wait = False 4283 4284 locks.append( 4285 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4286 ) 4287 4288 return locks 4289 4290 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4291 while this and self._match_set(self.SET_OPERATIONS): 4292 token_type = self._prev.token_type 4293 4294 if token_type == TokenType.UNION: 4295 operation: t.Type[exp.SetOperation] = exp.Union 4296 elif token_type == TokenType.EXCEPT: 4297 operation = exp.Except 4298 else: 4299 operation = exp.Intersect 4300 4301 comments = self._prev.comments 4302 4303 if self._match(TokenType.DISTINCT): 4304 distinct: t.Optional[bool] = True 4305 elif self._match(TokenType.ALL): 4306 distinct = False 4307 else: 4308 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4309 if distinct is None: 4310 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4311 4312 by_name = self._match_text_seq("BY", "NAME") 4313 expression = self._parse_select(nested=True, parse_set_operation=False) 4314 4315 this = self.expression( 4316 operation, 4317 comments=comments, 4318 this=this, 4319 distinct=distinct, 4320 by_name=by_name, 4321 expression=expression, 4322 ) 4323 4324 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4325 expression = this.expression 4326 4327 if expression: 4328 for arg in self.SET_OP_MODIFIERS: 4329 expr = expression.args.get(arg) 4330 if expr: 4331 this.set(arg, expr.pop()) 4332 4333 return this 4334 4335 def _parse_expression(self) -> t.Optional[exp.Expression]: 4336 return self._parse_alias(self._parse_assignment()) 4337 4338 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4339 this = self._parse_disjunction() 4340 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4341 # This allows us to parse <non-identifier token> := <expr> 4342 this = exp.column( 4343 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4344 ) 4345 4346 while self._match_set(self.ASSIGNMENT): 4347 if isinstance(this, exp.Column) and len(this.parts) == 1: 4348 this = this.this 4349 4350 this = self.expression( 4351 self.ASSIGNMENT[self._prev.token_type], 4352 this=this, 4353 comments=self._prev_comments, 4354 expression=self._parse_assignment(), 4355 ) 4356 4357 return this 4358 4359 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4360 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4361 4362 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4363 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4364 4365 def _parse_equality(self) -> t.Optional[exp.Expression]: 4366 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4367 4368 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4369 return self._parse_tokens(self._parse_range, self.COMPARISON) 4370 4371 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4372 this = this or self._parse_bitwise() 4373 negate = self._match(TokenType.NOT) 4374 4375 if self._match_set(self.RANGE_PARSERS): 4376 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4377 if not expression: 4378 return this 4379 4380 this = expression 4381 elif self._match(TokenType.ISNULL): 4382 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4383 4384 # Postgres supports ISNULL and NOTNULL for conditions. 4385 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4386 if self._match(TokenType.NOTNULL): 4387 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4388 this = self.expression(exp.Not, this=this) 4389 4390 if negate: 4391 this = self._negate_range(this) 4392 4393 if self._match(TokenType.IS): 4394 this = self._parse_is(this) 4395 4396 return this 4397 4398 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4399 if not this: 4400 return this 4401 4402 return self.expression(exp.Not, this=this) 4403 4404 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4405 index = self._index - 1 4406 negate = self._match(TokenType.NOT) 4407 4408 if self._match_text_seq("DISTINCT", "FROM"): 4409 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4410 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4411 4412 if self._match(TokenType.JSON): 4413 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4414 4415 if self._match_text_seq("WITH"): 4416 _with = True 4417 elif self._match_text_seq("WITHOUT"): 4418 _with = False 4419 else: 4420 _with = None 4421 4422 unique = self._match(TokenType.UNIQUE) 4423 self._match_text_seq("KEYS") 4424 expression: t.Optional[exp.Expression] = self.expression( 4425 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4426 ) 4427 else: 4428 expression = self._parse_primary() or self._parse_null() 4429 if not expression: 4430 self._retreat(index) 4431 return None 4432 4433 this = self.expression(exp.Is, this=this, expression=expression) 4434 return self.expression(exp.Not, this=this) if negate else this 4435 4436 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4437 unnest = self._parse_unnest(with_alias=False) 4438 if unnest: 4439 this = self.expression(exp.In, this=this, unnest=unnest) 4440 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4441 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4442 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4443 4444 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4445 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4446 else: 4447 this = self.expression(exp.In, this=this, expressions=expressions) 4448 4449 if matched_l_paren: 4450 self._match_r_paren(this) 4451 elif not self._match(TokenType.R_BRACKET, expression=this): 4452 self.raise_error("Expecting ]") 4453 else: 4454 this = self.expression(exp.In, this=this, field=self._parse_field()) 4455 4456 return this 4457 4458 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4459 low = self._parse_bitwise() 4460 self._match(TokenType.AND) 4461 high = self._parse_bitwise() 4462 return self.expression(exp.Between, this=this, low=low, high=high) 4463 4464 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4465 if not self._match(TokenType.ESCAPE): 4466 return this 4467 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4468 4469 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4470 index = self._index 4471 4472 if not self._match(TokenType.INTERVAL) and match_interval: 4473 return None 4474 4475 if self._match(TokenType.STRING, advance=False): 4476 this = self._parse_primary() 4477 else: 4478 this = self._parse_term() 4479 4480 if not this or ( 4481 isinstance(this, exp.Column) 4482 and not this.table 4483 and not this.this.quoted 4484 and this.name.upper() == "IS" 4485 ): 4486 self._retreat(index) 4487 return None 4488 4489 unit = self._parse_function() or ( 4490 not self._match(TokenType.ALIAS, advance=False) 4491 and self._parse_var(any_token=True, upper=True) 4492 ) 4493 4494 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4495 # each INTERVAL expression into this canonical form so it's easy to transpile 4496 if this and this.is_number: 4497 this = exp.Literal.string(this.to_py()) 4498 elif this and this.is_string: 4499 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4500 if len(parts) == 1: 4501 if unit: 4502 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4503 self._retreat(self._index - 1) 4504 4505 this = exp.Literal.string(parts[0][0]) 4506 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4507 4508 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4509 unit = self.expression( 4510 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4511 ) 4512 4513 interval = self.expression(exp.Interval, this=this, unit=unit) 4514 4515 index = self._index 4516 self._match(TokenType.PLUS) 4517 4518 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4519 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4520 return self.expression( 4521 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4522 ) 4523 4524 self._retreat(index) 4525 return interval 4526 4527 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4528 this = self._parse_term() 4529 4530 while True: 4531 if self._match_set(self.BITWISE): 4532 this = self.expression( 4533 self.BITWISE[self._prev.token_type], 4534 this=this, 4535 expression=self._parse_term(), 4536 ) 4537 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4538 this = self.expression( 4539 exp.DPipe, 4540 this=this, 4541 expression=self._parse_term(), 4542 safe=not self.dialect.STRICT_STRING_CONCAT, 4543 ) 4544 elif self._match(TokenType.DQMARK): 4545 this = self.expression( 4546 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4547 ) 4548 elif self._match_pair(TokenType.LT, TokenType.LT): 4549 this = self.expression( 4550 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4551 ) 4552 elif self._match_pair(TokenType.GT, TokenType.GT): 4553 this = self.expression( 4554 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4555 ) 4556 else: 4557 break 4558 4559 return this 4560 4561 def _parse_term(self) -> t.Optional[exp.Expression]: 4562 this = self._parse_factor() 4563 4564 while self._match_set(self.TERM): 4565 klass = self.TERM[self._prev.token_type] 4566 comments = self._prev_comments 4567 expression = self._parse_factor() 4568 4569 this = self.expression(klass, this=this, comments=comments, expression=expression) 4570 4571 if isinstance(this, exp.Collate): 4572 expr = this.expression 4573 4574 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4575 # fallback to Identifier / Var 4576 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4577 ident = expr.this 4578 if isinstance(ident, exp.Identifier): 4579 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4580 4581 return this 4582 4583 def _parse_factor(self) -> t.Optional[exp.Expression]: 4584 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4585 this = parse_method() 4586 4587 while self._match_set(self.FACTOR): 4588 klass = self.FACTOR[self._prev.token_type] 4589 comments = self._prev_comments 4590 expression = parse_method() 4591 4592 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4593 self._retreat(self._index - 1) 4594 return this 4595 4596 this = self.expression(klass, this=this, comments=comments, expression=expression) 4597 4598 if isinstance(this, exp.Div): 4599 this.args["typed"] = self.dialect.TYPED_DIVISION 4600 this.args["safe"] = self.dialect.SAFE_DIVISION 4601 4602 return this 4603 4604 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4605 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4606 4607 def _parse_unary(self) -> t.Optional[exp.Expression]: 4608 if self._match_set(self.UNARY_PARSERS): 4609 return self.UNARY_PARSERS[self._prev.token_type](self) 4610 return self._parse_at_time_zone(self._parse_type()) 4611 4612 def _parse_type( 4613 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4614 ) -> t.Optional[exp.Expression]: 4615 interval = parse_interval and self._parse_interval() 4616 if interval: 4617 return interval 4618 4619 index = self._index 4620 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4621 4622 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4623 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4624 if isinstance(data_type, exp.Cast): 4625 # This constructor can contain ops directly after it, for instance struct unnesting: 4626 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4627 return self._parse_column_ops(data_type) 4628 4629 if data_type: 4630 index2 = self._index 4631 this = self._parse_primary() 4632 4633 if isinstance(this, exp.Literal): 4634 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4635 if parser: 4636 return parser(self, this, data_type) 4637 4638 return self.expression(exp.Cast, this=this, to=data_type) 4639 4640 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4641 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4642 # 4643 # If the index difference here is greater than 1, that means the parser itself must have 4644 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4645 # 4646 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4647 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4648 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4649 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4650 # 4651 # In these cases, we don't really want to return the converted type, but instead retreat 4652 # and try to parse a Column or Identifier in the section below. 4653 if data_type.expressions and index2 - index > 1: 4654 self._retreat(index2) 4655 return self._parse_column_ops(data_type) 4656 4657 self._retreat(index) 4658 4659 if fallback_to_identifier: 4660 return self._parse_id_var() 4661 4662 this = self._parse_column() 4663 return this and self._parse_column_ops(this) 4664 4665 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4666 this = self._parse_type() 4667 if not this: 4668 return None 4669 4670 if isinstance(this, exp.Column) and not this.table: 4671 this = exp.var(this.name.upper()) 4672 4673 return self.expression( 4674 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4675 ) 4676 4677 def _parse_types( 4678 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4679 ) -> t.Optional[exp.Expression]: 4680 index = self._index 4681 4682 this: t.Optional[exp.Expression] = None 4683 prefix = self._match_text_seq("SYSUDTLIB", ".") 4684 4685 if not self._match_set(self.TYPE_TOKENS): 4686 identifier = allow_identifiers and self._parse_id_var( 4687 any_token=False, tokens=(TokenType.VAR,) 4688 ) 4689 if isinstance(identifier, exp.Identifier): 4690 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4691 4692 if len(tokens) != 1: 4693 self.raise_error("Unexpected identifier", self._prev) 4694 4695 if tokens[0].token_type in self.TYPE_TOKENS: 4696 self._prev = tokens[0] 4697 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4698 type_name = identifier.name 4699 4700 while self._match(TokenType.DOT): 4701 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4702 4703 this = exp.DataType.build(type_name, udt=True) 4704 else: 4705 self._retreat(self._index - 1) 4706 return None 4707 else: 4708 return None 4709 4710 type_token = self._prev.token_type 4711 4712 if type_token == TokenType.PSEUDO_TYPE: 4713 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4714 4715 if type_token == TokenType.OBJECT_IDENTIFIER: 4716 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4717 4718 # https://materialize.com/docs/sql/types/map/ 4719 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4720 key_type = self._parse_types( 4721 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4722 ) 4723 if not self._match(TokenType.FARROW): 4724 self._retreat(index) 4725 return None 4726 4727 value_type = self._parse_types( 4728 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4729 ) 4730 if not self._match(TokenType.R_BRACKET): 4731 self._retreat(index) 4732 return None 4733 4734 return exp.DataType( 4735 this=exp.DataType.Type.MAP, 4736 expressions=[key_type, value_type], 4737 nested=True, 4738 prefix=prefix, 4739 ) 4740 4741 nested = type_token in self.NESTED_TYPE_TOKENS 4742 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4743 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4744 expressions = None 4745 maybe_func = False 4746 4747 if self._match(TokenType.L_PAREN): 4748 if is_struct: 4749 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4750 elif nested: 4751 expressions = self._parse_csv( 4752 lambda: self._parse_types( 4753 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4754 ) 4755 ) 4756 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4757 this = expressions[0] 4758 this.set("nullable", True) 4759 self._match_r_paren() 4760 return this 4761 elif type_token in self.ENUM_TYPE_TOKENS: 4762 expressions = self._parse_csv(self._parse_equality) 4763 elif is_aggregate: 4764 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4765 any_token=False, tokens=(TokenType.VAR,) 4766 ) 4767 if not func_or_ident or not self._match(TokenType.COMMA): 4768 return None 4769 expressions = self._parse_csv( 4770 lambda: self._parse_types( 4771 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4772 ) 4773 ) 4774 expressions.insert(0, func_or_ident) 4775 else: 4776 expressions = self._parse_csv(self._parse_type_size) 4777 4778 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4779 if type_token == TokenType.VECTOR and len(expressions) == 2: 4780 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4781 4782 if not expressions or not self._match(TokenType.R_PAREN): 4783 self._retreat(index) 4784 return None 4785 4786 maybe_func = True 4787 4788 values: t.Optional[t.List[exp.Expression]] = None 4789 4790 if nested and self._match(TokenType.LT): 4791 if is_struct: 4792 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4793 else: 4794 expressions = self._parse_csv( 4795 lambda: self._parse_types( 4796 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4797 ) 4798 ) 4799 4800 if not self._match(TokenType.GT): 4801 self.raise_error("Expecting >") 4802 4803 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4804 values = self._parse_csv(self._parse_assignment) 4805 if not values and is_struct: 4806 values = None 4807 self._retreat(self._index - 1) 4808 else: 4809 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4810 4811 if type_token in self.TIMESTAMPS: 4812 if self._match_text_seq("WITH", "TIME", "ZONE"): 4813 maybe_func = False 4814 tz_type = ( 4815 exp.DataType.Type.TIMETZ 4816 if type_token in self.TIMES 4817 else exp.DataType.Type.TIMESTAMPTZ 4818 ) 4819 this = exp.DataType(this=tz_type, expressions=expressions) 4820 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4821 maybe_func = False 4822 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4823 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4824 maybe_func = False 4825 elif type_token == TokenType.INTERVAL: 4826 unit = self._parse_var(upper=True) 4827 if unit: 4828 if self._match_text_seq("TO"): 4829 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4830 4831 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4832 else: 4833 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4834 4835 if maybe_func and check_func: 4836 index2 = self._index 4837 peek = self._parse_string() 4838 4839 if not peek: 4840 self._retreat(index) 4841 return None 4842 4843 self._retreat(index2) 4844 4845 if not this: 4846 if self._match_text_seq("UNSIGNED"): 4847 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4848 if not unsigned_type_token: 4849 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4850 4851 type_token = unsigned_type_token or type_token 4852 4853 this = exp.DataType( 4854 this=exp.DataType.Type[type_token.value], 4855 expressions=expressions, 4856 nested=nested, 4857 prefix=prefix, 4858 ) 4859 4860 # Empty arrays/structs are allowed 4861 if values is not None: 4862 cls = exp.Struct if is_struct else exp.Array 4863 this = exp.cast(cls(expressions=values), this, copy=False) 4864 4865 elif expressions: 4866 this.set("expressions", expressions) 4867 4868 # https://materialize.com/docs/sql/types/list/#type-name 4869 while self._match(TokenType.LIST): 4870 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4871 4872 index = self._index 4873 4874 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4875 matched_array = self._match(TokenType.ARRAY) 4876 4877 while self._curr: 4878 datatype_token = self._prev.token_type 4879 matched_l_bracket = self._match(TokenType.L_BRACKET) 4880 if not matched_l_bracket and not matched_array: 4881 break 4882 4883 matched_array = False 4884 values = self._parse_csv(self._parse_assignment) or None 4885 if ( 4886 values 4887 and not schema 4888 and ( 4889 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4890 ) 4891 ): 4892 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4893 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4894 self._retreat(index) 4895 break 4896 4897 this = exp.DataType( 4898 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4899 ) 4900 self._match(TokenType.R_BRACKET) 4901 4902 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4903 converter = self.TYPE_CONVERTERS.get(this.this) 4904 if converter: 4905 this = converter(t.cast(exp.DataType, this)) 4906 4907 return this 4908 4909 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4910 index = self._index 4911 4912 if ( 4913 self._curr 4914 and self._next 4915 and self._curr.token_type in self.TYPE_TOKENS 4916 and self._next.token_type in self.TYPE_TOKENS 4917 ): 4918 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4919 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4920 this = self._parse_id_var() 4921 else: 4922 this = ( 4923 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4924 or self._parse_id_var() 4925 ) 4926 4927 self._match(TokenType.COLON) 4928 4929 if ( 4930 type_required 4931 and not isinstance(this, exp.DataType) 4932 and not self._match_set(self.TYPE_TOKENS, advance=False) 4933 ): 4934 self._retreat(index) 4935 return self._parse_types() 4936 4937 return self._parse_column_def(this) 4938 4939 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4940 if not self._match_text_seq("AT", "TIME", "ZONE"): 4941 return this 4942 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4943 4944 def _parse_column(self) -> t.Optional[exp.Expression]: 4945 this = self._parse_column_reference() 4946 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4947 4948 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4949 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4950 4951 return column 4952 4953 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4954 this = self._parse_field() 4955 if ( 4956 not this 4957 and self._match(TokenType.VALUES, advance=False) 4958 and self.VALUES_FOLLOWED_BY_PAREN 4959 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4960 ): 4961 this = self._parse_id_var() 4962 4963 if isinstance(this, exp.Identifier): 4964 # We bubble up comments from the Identifier to the Column 4965 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4966 4967 return this 4968 4969 def _parse_colon_as_variant_extract( 4970 self, this: t.Optional[exp.Expression] 4971 ) -> t.Optional[exp.Expression]: 4972 casts = [] 4973 json_path = [] 4974 escape = None 4975 4976 while self._match(TokenType.COLON): 4977 start_index = self._index 4978 4979 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4980 path = self._parse_column_ops( 4981 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4982 ) 4983 4984 # The cast :: operator has a lower precedence than the extraction operator :, so 4985 # we rearrange the AST appropriately to avoid casting the JSON path 4986 while isinstance(path, exp.Cast): 4987 casts.append(path.to) 4988 path = path.this 4989 4990 if casts: 4991 dcolon_offset = next( 4992 i 4993 for i, t in enumerate(self._tokens[start_index:]) 4994 if t.token_type == TokenType.DCOLON 4995 ) 4996 end_token = self._tokens[start_index + dcolon_offset - 1] 4997 else: 4998 end_token = self._prev 4999 5000 if path: 5001 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5002 # it'll roundtrip to a string literal in GET_PATH 5003 if isinstance(path, exp.Identifier) and path.quoted: 5004 escape = True 5005 5006 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5007 5008 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5009 # Databricks transforms it back to the colon/dot notation 5010 if json_path: 5011 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5012 5013 if json_path_expr: 5014 json_path_expr.set("escape", escape) 5015 5016 this = self.expression( 5017 exp.JSONExtract, 5018 this=this, 5019 expression=json_path_expr, 5020 variant_extract=True, 5021 ) 5022 5023 while casts: 5024 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5025 5026 return this 5027 5028 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5029 return self._parse_types() 5030 5031 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5032 this = self._parse_bracket(this) 5033 5034 while self._match_set(self.COLUMN_OPERATORS): 5035 op_token = self._prev.token_type 5036 op = self.COLUMN_OPERATORS.get(op_token) 5037 5038 if op_token == TokenType.DCOLON: 5039 field = self._parse_dcolon() 5040 if not field: 5041 self.raise_error("Expected type") 5042 elif op and self._curr: 5043 field = self._parse_column_reference() 5044 else: 5045 field = self._parse_field(any_token=True, anonymous_func=True) 5046 5047 if isinstance(field, exp.Func) and this: 5048 # bigquery allows function calls like x.y.count(...) 5049 # SAFE.SUBSTR(...) 5050 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5051 this = exp.replace_tree( 5052 this, 5053 lambda n: ( 5054 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5055 if n.table 5056 else n.this 5057 ) 5058 if isinstance(n, exp.Column) 5059 else n, 5060 ) 5061 5062 if op: 5063 this = op(self, this, field) 5064 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5065 this = self.expression( 5066 exp.Column, 5067 this=field, 5068 table=this.this, 5069 db=this.args.get("table"), 5070 catalog=this.args.get("db"), 5071 ) 5072 else: 5073 this = self.expression(exp.Dot, this=this, expression=field) 5074 5075 this = self._parse_bracket(this) 5076 5077 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5078 5079 def _parse_primary(self) -> t.Optional[exp.Expression]: 5080 if self._match_set(self.PRIMARY_PARSERS): 5081 token_type = self._prev.token_type 5082 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5083 5084 if token_type == TokenType.STRING: 5085 expressions = [primary] 5086 while self._match(TokenType.STRING): 5087 expressions.append(exp.Literal.string(self._prev.text)) 5088 5089 if len(expressions) > 1: 5090 return self.expression(exp.Concat, expressions=expressions) 5091 5092 return primary 5093 5094 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5095 return exp.Literal.number(f"0.{self._prev.text}") 5096 5097 if self._match(TokenType.L_PAREN): 5098 comments = self._prev_comments 5099 query = self._parse_select() 5100 5101 if query: 5102 expressions = [query] 5103 else: 5104 expressions = self._parse_expressions() 5105 5106 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5107 5108 if not this and self._match(TokenType.R_PAREN, advance=False): 5109 this = self.expression(exp.Tuple) 5110 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5111 this = self._parse_subquery(this=this, parse_alias=False) 5112 elif isinstance(this, exp.Subquery): 5113 this = self._parse_subquery( 5114 this=self._parse_set_operations(this), parse_alias=False 5115 ) 5116 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5117 this = self.expression(exp.Tuple, expressions=expressions) 5118 else: 5119 this = self.expression(exp.Paren, this=this) 5120 5121 if this: 5122 this.add_comments(comments) 5123 5124 self._match_r_paren(expression=this) 5125 return this 5126 5127 return None 5128 5129 def _parse_field( 5130 self, 5131 any_token: bool = False, 5132 tokens: t.Optional[t.Collection[TokenType]] = None, 5133 anonymous_func: bool = False, 5134 ) -> t.Optional[exp.Expression]: 5135 if anonymous_func: 5136 field = ( 5137 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5138 or self._parse_primary() 5139 ) 5140 else: 5141 field = self._parse_primary() or self._parse_function( 5142 anonymous=anonymous_func, any_token=any_token 5143 ) 5144 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5145 5146 def _parse_function( 5147 self, 5148 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5149 anonymous: bool = False, 5150 optional_parens: bool = True, 5151 any_token: bool = False, 5152 ) -> t.Optional[exp.Expression]: 5153 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5154 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5155 fn_syntax = False 5156 if ( 5157 self._match(TokenType.L_BRACE, advance=False) 5158 and self._next 5159 and self._next.text.upper() == "FN" 5160 ): 5161 self._advance(2) 5162 fn_syntax = True 5163 5164 func = self._parse_function_call( 5165 functions=functions, 5166 anonymous=anonymous, 5167 optional_parens=optional_parens, 5168 any_token=any_token, 5169 ) 5170 5171 if fn_syntax: 5172 self._match(TokenType.R_BRACE) 5173 5174 return func 5175 5176 def _parse_function_call( 5177 self, 5178 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5179 anonymous: bool = False, 5180 optional_parens: bool = True, 5181 any_token: bool = False, 5182 ) -> t.Optional[exp.Expression]: 5183 if not self._curr: 5184 return None 5185 5186 comments = self._curr.comments 5187 token_type = self._curr.token_type 5188 this = self._curr.text 5189 upper = this.upper() 5190 5191 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5192 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5193 self._advance() 5194 return self._parse_window(parser(self)) 5195 5196 if not self._next or self._next.token_type != TokenType.L_PAREN: 5197 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5198 self._advance() 5199 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5200 5201 return None 5202 5203 if any_token: 5204 if token_type in self.RESERVED_TOKENS: 5205 return None 5206 elif token_type not in self.FUNC_TOKENS: 5207 return None 5208 5209 self._advance(2) 5210 5211 parser = self.FUNCTION_PARSERS.get(upper) 5212 if parser and not anonymous: 5213 this = parser(self) 5214 else: 5215 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5216 5217 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5218 this = self.expression(subquery_predicate, this=self._parse_select()) 5219 self._match_r_paren() 5220 return this 5221 5222 if functions is None: 5223 functions = self.FUNCTIONS 5224 5225 function = functions.get(upper) 5226 5227 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5228 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5229 5230 if alias: 5231 args = self._kv_to_prop_eq(args) 5232 5233 if function and not anonymous: 5234 if "dialect" in function.__code__.co_varnames: 5235 func = function(args, dialect=self.dialect) 5236 else: 5237 func = function(args) 5238 5239 func = self.validate_expression(func, args) 5240 if not self.dialect.NORMALIZE_FUNCTIONS: 5241 func.meta["name"] = this 5242 5243 this = func 5244 else: 5245 if token_type == TokenType.IDENTIFIER: 5246 this = exp.Identifier(this=this, quoted=True) 5247 this = self.expression(exp.Anonymous, this=this, expressions=args) 5248 5249 if isinstance(this, exp.Expression): 5250 this.add_comments(comments) 5251 5252 self._match_r_paren(this) 5253 return self._parse_window(this) 5254 5255 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5256 return expression 5257 5258 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5259 transformed = [] 5260 5261 for index, e in enumerate(expressions): 5262 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5263 if isinstance(e, exp.Alias): 5264 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5265 5266 if not isinstance(e, exp.PropertyEQ): 5267 e = self.expression( 5268 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5269 ) 5270 5271 if isinstance(e.this, exp.Column): 5272 e.this.replace(e.this.this) 5273 else: 5274 e = self._to_prop_eq(e, index) 5275 5276 transformed.append(e) 5277 5278 return transformed 5279 5280 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5281 return self._parse_column_def(self._parse_id_var()) 5282 5283 def _parse_user_defined_function( 5284 self, kind: t.Optional[TokenType] = None 5285 ) -> t.Optional[exp.Expression]: 5286 this = self._parse_id_var() 5287 5288 while self._match(TokenType.DOT): 5289 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5290 5291 if not self._match(TokenType.L_PAREN): 5292 return this 5293 5294 expressions = self._parse_csv(self._parse_function_parameter) 5295 self._match_r_paren() 5296 return self.expression( 5297 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5298 ) 5299 5300 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5301 literal = self._parse_primary() 5302 if literal: 5303 return self.expression(exp.Introducer, this=token.text, expression=literal) 5304 5305 return self.expression(exp.Identifier, this=token.text) 5306 5307 def _parse_session_parameter(self) -> exp.SessionParameter: 5308 kind = None 5309 this = self._parse_id_var() or self._parse_primary() 5310 5311 if this and self._match(TokenType.DOT): 5312 kind = this.name 5313 this = self._parse_var() or self._parse_primary() 5314 5315 return self.expression(exp.SessionParameter, this=this, kind=kind) 5316 5317 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5318 return self._parse_id_var() 5319 5320 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5321 index = self._index 5322 5323 if self._match(TokenType.L_PAREN): 5324 expressions = t.cast( 5325 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5326 ) 5327 5328 if not self._match(TokenType.R_PAREN): 5329 self._retreat(index) 5330 else: 5331 expressions = [self._parse_lambda_arg()] 5332 5333 if self._match_set(self.LAMBDAS): 5334 return self.LAMBDAS[self._prev.token_type](self, expressions) 5335 5336 self._retreat(index) 5337 5338 this: t.Optional[exp.Expression] 5339 5340 if self._match(TokenType.DISTINCT): 5341 this = self.expression( 5342 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5343 ) 5344 else: 5345 this = self._parse_select_or_expression(alias=alias) 5346 5347 return self._parse_limit( 5348 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5349 ) 5350 5351 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5352 index = self._index 5353 if not self._match(TokenType.L_PAREN): 5354 return this 5355 5356 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5357 # expr can be of both types 5358 if self._match_set(self.SELECT_START_TOKENS): 5359 self._retreat(index) 5360 return this 5361 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5362 self._match_r_paren() 5363 return self.expression(exp.Schema, this=this, expressions=args) 5364 5365 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5366 return self._parse_column_def(self._parse_field(any_token=True)) 5367 5368 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5369 # column defs are not really columns, they're identifiers 5370 if isinstance(this, exp.Column): 5371 this = this.this 5372 5373 kind = self._parse_types(schema=True) 5374 5375 if self._match_text_seq("FOR", "ORDINALITY"): 5376 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5377 5378 constraints: t.List[exp.Expression] = [] 5379 5380 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5381 ("ALIAS", "MATERIALIZED") 5382 ): 5383 persisted = self._prev.text.upper() == "MATERIALIZED" 5384 constraint_kind = exp.ComputedColumnConstraint( 5385 this=self._parse_assignment(), 5386 persisted=persisted or self._match_text_seq("PERSISTED"), 5387 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5388 ) 5389 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5390 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5391 self._match(TokenType.ALIAS) 5392 constraints.append( 5393 self.expression( 5394 exp.ColumnConstraint, 5395 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5396 ) 5397 ) 5398 5399 while True: 5400 constraint = self._parse_column_constraint() 5401 if not constraint: 5402 break 5403 constraints.append(constraint) 5404 5405 if not kind and not constraints: 5406 return this 5407 5408 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5409 5410 def _parse_auto_increment( 5411 self, 5412 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5413 start = None 5414 increment = None 5415 5416 if self._match(TokenType.L_PAREN, advance=False): 5417 args = self._parse_wrapped_csv(self._parse_bitwise) 5418 start = seq_get(args, 0) 5419 increment = seq_get(args, 1) 5420 elif self._match_text_seq("START"): 5421 start = self._parse_bitwise() 5422 self._match_text_seq("INCREMENT") 5423 increment = self._parse_bitwise() 5424 5425 if start and increment: 5426 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5427 5428 return exp.AutoIncrementColumnConstraint() 5429 5430 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5431 if not self._match_text_seq("REFRESH"): 5432 self._retreat(self._index - 1) 5433 return None 5434 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5435 5436 def _parse_compress(self) -> exp.CompressColumnConstraint: 5437 if self._match(TokenType.L_PAREN, advance=False): 5438 return self.expression( 5439 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5440 ) 5441 5442 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5443 5444 def _parse_generated_as_identity( 5445 self, 5446 ) -> ( 5447 exp.GeneratedAsIdentityColumnConstraint 5448 | exp.ComputedColumnConstraint 5449 | exp.GeneratedAsRowColumnConstraint 5450 ): 5451 if self._match_text_seq("BY", "DEFAULT"): 5452 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5453 this = self.expression( 5454 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5455 ) 5456 else: 5457 self._match_text_seq("ALWAYS") 5458 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5459 5460 self._match(TokenType.ALIAS) 5461 5462 if self._match_text_seq("ROW"): 5463 start = self._match_text_seq("START") 5464 if not start: 5465 self._match(TokenType.END) 5466 hidden = self._match_text_seq("HIDDEN") 5467 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5468 5469 identity = self._match_text_seq("IDENTITY") 5470 5471 if self._match(TokenType.L_PAREN): 5472 if self._match(TokenType.START_WITH): 5473 this.set("start", self._parse_bitwise()) 5474 if self._match_text_seq("INCREMENT", "BY"): 5475 this.set("increment", self._parse_bitwise()) 5476 if self._match_text_seq("MINVALUE"): 5477 this.set("minvalue", self._parse_bitwise()) 5478 if self._match_text_seq("MAXVALUE"): 5479 this.set("maxvalue", self._parse_bitwise()) 5480 5481 if self._match_text_seq("CYCLE"): 5482 this.set("cycle", True) 5483 elif self._match_text_seq("NO", "CYCLE"): 5484 this.set("cycle", False) 5485 5486 if not identity: 5487 this.set("expression", self._parse_range()) 5488 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5489 args = self._parse_csv(self._parse_bitwise) 5490 this.set("start", seq_get(args, 0)) 5491 this.set("increment", seq_get(args, 1)) 5492 5493 self._match_r_paren() 5494 5495 return this 5496 5497 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5498 self._match_text_seq("LENGTH") 5499 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5500 5501 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5502 if self._match_text_seq("NULL"): 5503 return self.expression(exp.NotNullColumnConstraint) 5504 if self._match_text_seq("CASESPECIFIC"): 5505 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5506 if self._match_text_seq("FOR", "REPLICATION"): 5507 return self.expression(exp.NotForReplicationColumnConstraint) 5508 5509 # Unconsume the `NOT` token 5510 self._retreat(self._index - 1) 5511 return None 5512 5513 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5514 if self._match(TokenType.CONSTRAINT): 5515 this = self._parse_id_var() 5516 else: 5517 this = None 5518 5519 if self._match_texts(self.CONSTRAINT_PARSERS): 5520 return self.expression( 5521 exp.ColumnConstraint, 5522 this=this, 5523 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5524 ) 5525 5526 return this 5527 5528 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5529 if not self._match(TokenType.CONSTRAINT): 5530 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5531 5532 return self.expression( 5533 exp.Constraint, 5534 this=self._parse_id_var(), 5535 expressions=self._parse_unnamed_constraints(), 5536 ) 5537 5538 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5539 constraints = [] 5540 while True: 5541 constraint = self._parse_unnamed_constraint() or self._parse_function() 5542 if not constraint: 5543 break 5544 constraints.append(constraint) 5545 5546 return constraints 5547 5548 def _parse_unnamed_constraint( 5549 self, constraints: t.Optional[t.Collection[str]] = None 5550 ) -> t.Optional[exp.Expression]: 5551 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5552 constraints or self.CONSTRAINT_PARSERS 5553 ): 5554 return None 5555 5556 constraint = self._prev.text.upper() 5557 if constraint not in self.CONSTRAINT_PARSERS: 5558 self.raise_error(f"No parser found for schema constraint {constraint}.") 5559 5560 return self.CONSTRAINT_PARSERS[constraint](self) 5561 5562 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5563 return self._parse_id_var(any_token=False) 5564 5565 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5566 self._match_text_seq("KEY") 5567 return self.expression( 5568 exp.UniqueColumnConstraint, 5569 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5570 this=self._parse_schema(self._parse_unique_key()), 5571 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5572 on_conflict=self._parse_on_conflict(), 5573 ) 5574 5575 def _parse_key_constraint_options(self) -> t.List[str]: 5576 options = [] 5577 while True: 5578 if not self._curr: 5579 break 5580 5581 if self._match(TokenType.ON): 5582 action = None 5583 on = self._advance_any() and self._prev.text 5584 5585 if self._match_text_seq("NO", "ACTION"): 5586 action = "NO ACTION" 5587 elif self._match_text_seq("CASCADE"): 5588 action = "CASCADE" 5589 elif self._match_text_seq("RESTRICT"): 5590 action = "RESTRICT" 5591 elif self._match_pair(TokenType.SET, TokenType.NULL): 5592 action = "SET NULL" 5593 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5594 action = "SET DEFAULT" 5595 else: 5596 self.raise_error("Invalid key constraint") 5597 5598 options.append(f"ON {on} {action}") 5599 else: 5600 var = self._parse_var_from_options( 5601 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5602 ) 5603 if not var: 5604 break 5605 options.append(var.name) 5606 5607 return options 5608 5609 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5610 if match and not self._match(TokenType.REFERENCES): 5611 return None 5612 5613 expressions = None 5614 this = self._parse_table(schema=True) 5615 options = self._parse_key_constraint_options() 5616 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5617 5618 def _parse_foreign_key(self) -> exp.ForeignKey: 5619 expressions = self._parse_wrapped_id_vars() 5620 reference = self._parse_references() 5621 options = {} 5622 5623 while self._match(TokenType.ON): 5624 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5625 self.raise_error("Expected DELETE or UPDATE") 5626 5627 kind = self._prev.text.lower() 5628 5629 if self._match_text_seq("NO", "ACTION"): 5630 action = "NO ACTION" 5631 elif self._match(TokenType.SET): 5632 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5633 action = "SET " + self._prev.text.upper() 5634 else: 5635 self._advance() 5636 action = self._prev.text.upper() 5637 5638 options[kind] = action 5639 5640 return self.expression( 5641 exp.ForeignKey, 5642 expressions=expressions, 5643 reference=reference, 5644 **options, # type: ignore 5645 ) 5646 5647 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5648 return self._parse_field() 5649 5650 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5651 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5652 self._retreat(self._index - 1) 5653 return None 5654 5655 id_vars = self._parse_wrapped_id_vars() 5656 return self.expression( 5657 exp.PeriodForSystemTimeConstraint, 5658 this=seq_get(id_vars, 0), 5659 expression=seq_get(id_vars, 1), 5660 ) 5661 5662 def _parse_primary_key( 5663 self, wrapped_optional: bool = False, in_props: bool = False 5664 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5665 desc = ( 5666 self._match_set((TokenType.ASC, TokenType.DESC)) 5667 and self._prev.token_type == TokenType.DESC 5668 ) 5669 5670 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5671 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5672 5673 expressions = self._parse_wrapped_csv( 5674 self._parse_primary_key_part, optional=wrapped_optional 5675 ) 5676 options = self._parse_key_constraint_options() 5677 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5678 5679 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5680 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5681 5682 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5683 """ 5684 Parses a datetime column in ODBC format. We parse the column into the corresponding 5685 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5686 same as we did for `DATE('yyyy-mm-dd')`. 5687 5688 Reference: 5689 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5690 """ 5691 self._match(TokenType.VAR) 5692 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5693 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5694 if not self._match(TokenType.R_BRACE): 5695 self.raise_error("Expected }") 5696 return expression 5697 5698 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5699 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5700 return this 5701 5702 bracket_kind = self._prev.token_type 5703 if ( 5704 bracket_kind == TokenType.L_BRACE 5705 and self._curr 5706 and self._curr.token_type == TokenType.VAR 5707 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5708 ): 5709 return self._parse_odbc_datetime_literal() 5710 5711 expressions = self._parse_csv( 5712 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5713 ) 5714 5715 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5716 self.raise_error("Expected ]") 5717 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5718 self.raise_error("Expected }") 5719 5720 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5721 if bracket_kind == TokenType.L_BRACE: 5722 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5723 elif not this: 5724 this = build_array_constructor( 5725 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5726 ) 5727 else: 5728 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5729 if constructor_type: 5730 return build_array_constructor( 5731 constructor_type, 5732 args=expressions, 5733 bracket_kind=bracket_kind, 5734 dialect=self.dialect, 5735 ) 5736 5737 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5738 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5739 5740 self._add_comments(this) 5741 return self._parse_bracket(this) 5742 5743 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5744 if self._match(TokenType.COLON): 5745 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5746 return this 5747 5748 def _parse_case(self) -> t.Optional[exp.Expression]: 5749 ifs = [] 5750 default = None 5751 5752 comments = self._prev_comments 5753 expression = self._parse_assignment() 5754 5755 while self._match(TokenType.WHEN): 5756 this = self._parse_assignment() 5757 self._match(TokenType.THEN) 5758 then = self._parse_assignment() 5759 ifs.append(self.expression(exp.If, this=this, true=then)) 5760 5761 if self._match(TokenType.ELSE): 5762 default = self._parse_assignment() 5763 5764 if not self._match(TokenType.END): 5765 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5766 default = exp.column("interval") 5767 else: 5768 self.raise_error("Expected END after CASE", self._prev) 5769 5770 return self.expression( 5771 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5772 ) 5773 5774 def _parse_if(self) -> t.Optional[exp.Expression]: 5775 if self._match(TokenType.L_PAREN): 5776 args = self._parse_csv(self._parse_assignment) 5777 this = self.validate_expression(exp.If.from_arg_list(args), args) 5778 self._match_r_paren() 5779 else: 5780 index = self._index - 1 5781 5782 if self.NO_PAREN_IF_COMMANDS and index == 0: 5783 return self._parse_as_command(self._prev) 5784 5785 condition = self._parse_assignment() 5786 5787 if not condition: 5788 self._retreat(index) 5789 return None 5790 5791 self._match(TokenType.THEN) 5792 true = self._parse_assignment() 5793 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5794 self._match(TokenType.END) 5795 this = self.expression(exp.If, this=condition, true=true, false=false) 5796 5797 return this 5798 5799 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5800 if not self._match_text_seq("VALUE", "FOR"): 5801 self._retreat(self._index - 1) 5802 return None 5803 5804 return self.expression( 5805 exp.NextValueFor, 5806 this=self._parse_column(), 5807 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5808 ) 5809 5810 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5811 this = self._parse_function() or self._parse_var_or_string(upper=True) 5812 5813 if self._match(TokenType.FROM): 5814 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5815 5816 if not self._match(TokenType.COMMA): 5817 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5818 5819 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5820 5821 def _parse_gap_fill(self) -> exp.GapFill: 5822 self._match(TokenType.TABLE) 5823 this = self._parse_table() 5824 5825 self._match(TokenType.COMMA) 5826 args = [this, *self._parse_csv(self._parse_lambda)] 5827 5828 gap_fill = exp.GapFill.from_arg_list(args) 5829 return self.validate_expression(gap_fill, args) 5830 5831 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5832 this = self._parse_assignment() 5833 5834 if not self._match(TokenType.ALIAS): 5835 if self._match(TokenType.COMMA): 5836 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5837 5838 self.raise_error("Expected AS after CAST") 5839 5840 fmt = None 5841 to = self._parse_types() 5842 5843 if self._match(TokenType.FORMAT): 5844 fmt_string = self._parse_string() 5845 fmt = self._parse_at_time_zone(fmt_string) 5846 5847 if not to: 5848 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5849 if to.this in exp.DataType.TEMPORAL_TYPES: 5850 this = self.expression( 5851 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5852 this=this, 5853 format=exp.Literal.string( 5854 format_time( 5855 fmt_string.this if fmt_string else "", 5856 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5857 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5858 ) 5859 ), 5860 safe=safe, 5861 ) 5862 5863 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5864 this.set("zone", fmt.args["zone"]) 5865 return this 5866 elif not to: 5867 self.raise_error("Expected TYPE after CAST") 5868 elif isinstance(to, exp.Identifier): 5869 to = exp.DataType.build(to.name, udt=True) 5870 elif to.this == exp.DataType.Type.CHAR: 5871 if self._match(TokenType.CHARACTER_SET): 5872 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5873 5874 return self.expression( 5875 exp.Cast if strict else exp.TryCast, 5876 this=this, 5877 to=to, 5878 format=fmt, 5879 safe=safe, 5880 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5881 ) 5882 5883 def _parse_string_agg(self) -> exp.Expression: 5884 if self._match(TokenType.DISTINCT): 5885 args: t.List[t.Optional[exp.Expression]] = [ 5886 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5887 ] 5888 if self._match(TokenType.COMMA): 5889 args.extend(self._parse_csv(self._parse_assignment)) 5890 else: 5891 args = self._parse_csv(self._parse_assignment) # type: ignore 5892 5893 index = self._index 5894 if not self._match(TokenType.R_PAREN) and args: 5895 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5896 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5897 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5898 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5899 5900 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5901 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5902 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5903 if not self._match_text_seq("WITHIN", "GROUP"): 5904 self._retreat(index) 5905 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5906 5907 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5908 order = self._parse_order(this=seq_get(args, 0)) 5909 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5910 5911 def _parse_convert( 5912 self, strict: bool, safe: t.Optional[bool] = None 5913 ) -> t.Optional[exp.Expression]: 5914 this = self._parse_bitwise() 5915 5916 if self._match(TokenType.USING): 5917 to: t.Optional[exp.Expression] = self.expression( 5918 exp.CharacterSet, this=self._parse_var() 5919 ) 5920 elif self._match(TokenType.COMMA): 5921 to = self._parse_types() 5922 else: 5923 to = None 5924 5925 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5926 5927 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5928 """ 5929 There are generally two variants of the DECODE function: 5930 5931 - DECODE(bin, charset) 5932 - DECODE(expression, search, result [, search, result] ... [, default]) 5933 5934 The second variant will always be parsed into a CASE expression. Note that NULL 5935 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5936 instead of relying on pattern matching. 5937 """ 5938 args = self._parse_csv(self._parse_assignment) 5939 5940 if len(args) < 3: 5941 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5942 5943 expression, *expressions = args 5944 if not expression: 5945 return None 5946 5947 ifs = [] 5948 for search, result in zip(expressions[::2], expressions[1::2]): 5949 if not search or not result: 5950 return None 5951 5952 if isinstance(search, exp.Literal): 5953 ifs.append( 5954 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5955 ) 5956 elif isinstance(search, exp.Null): 5957 ifs.append( 5958 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5959 ) 5960 else: 5961 cond = exp.or_( 5962 exp.EQ(this=expression.copy(), expression=search), 5963 exp.and_( 5964 exp.Is(this=expression.copy(), expression=exp.Null()), 5965 exp.Is(this=search.copy(), expression=exp.Null()), 5966 copy=False, 5967 ), 5968 copy=False, 5969 ) 5970 ifs.append(exp.If(this=cond, true=result)) 5971 5972 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5973 5974 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5975 self._match_text_seq("KEY") 5976 key = self._parse_column() 5977 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5978 self._match_text_seq("VALUE") 5979 value = self._parse_bitwise() 5980 5981 if not key and not value: 5982 return None 5983 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5984 5985 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5986 if not this or not self._match_text_seq("FORMAT", "JSON"): 5987 return this 5988 5989 return self.expression(exp.FormatJson, this=this) 5990 5991 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5992 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5993 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5994 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5995 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5996 else: 5997 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5998 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5999 6000 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6001 6002 if not empty and not error and not null: 6003 return None 6004 6005 return self.expression( 6006 exp.OnCondition, 6007 empty=empty, 6008 error=error, 6009 null=null, 6010 ) 6011 6012 def _parse_on_handling( 6013 self, on: str, *values: str 6014 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6015 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6016 for value in values: 6017 if self._match_text_seq(value, "ON", on): 6018 return f"{value} ON {on}" 6019 6020 index = self._index 6021 if self._match(TokenType.DEFAULT): 6022 default_value = self._parse_bitwise() 6023 if self._match_text_seq("ON", on): 6024 return default_value 6025 6026 self._retreat(index) 6027 6028 return None 6029 6030 @t.overload 6031 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6032 6033 @t.overload 6034 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6035 6036 def _parse_json_object(self, agg=False): 6037 star = self._parse_star() 6038 expressions = ( 6039 [star] 6040 if star 6041 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6042 ) 6043 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6044 6045 unique_keys = None 6046 if self._match_text_seq("WITH", "UNIQUE"): 6047 unique_keys = True 6048 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6049 unique_keys = False 6050 6051 self._match_text_seq("KEYS") 6052 6053 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6054 self._parse_type() 6055 ) 6056 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6057 6058 return self.expression( 6059 exp.JSONObjectAgg if agg else exp.JSONObject, 6060 expressions=expressions, 6061 null_handling=null_handling, 6062 unique_keys=unique_keys, 6063 return_type=return_type, 6064 encoding=encoding, 6065 ) 6066 6067 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6068 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6069 if not self._match_text_seq("NESTED"): 6070 this = self._parse_id_var() 6071 kind = self._parse_types(allow_identifiers=False) 6072 nested = None 6073 else: 6074 this = None 6075 kind = None 6076 nested = True 6077 6078 path = self._match_text_seq("PATH") and self._parse_string() 6079 nested_schema = nested and self._parse_json_schema() 6080 6081 return self.expression( 6082 exp.JSONColumnDef, 6083 this=this, 6084 kind=kind, 6085 path=path, 6086 nested_schema=nested_schema, 6087 ) 6088 6089 def _parse_json_schema(self) -> exp.JSONSchema: 6090 self._match_text_seq("COLUMNS") 6091 return self.expression( 6092 exp.JSONSchema, 6093 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6094 ) 6095 6096 def _parse_json_table(self) -> exp.JSONTable: 6097 this = self._parse_format_json(self._parse_bitwise()) 6098 path = self._match(TokenType.COMMA) and self._parse_string() 6099 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6100 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6101 schema = self._parse_json_schema() 6102 6103 return exp.JSONTable( 6104 this=this, 6105 schema=schema, 6106 path=path, 6107 error_handling=error_handling, 6108 empty_handling=empty_handling, 6109 ) 6110 6111 def _parse_match_against(self) -> exp.MatchAgainst: 6112 expressions = self._parse_csv(self._parse_column) 6113 6114 self._match_text_seq(")", "AGAINST", "(") 6115 6116 this = self._parse_string() 6117 6118 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6119 modifier = "IN NATURAL LANGUAGE MODE" 6120 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6121 modifier = f"{modifier} WITH QUERY EXPANSION" 6122 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6123 modifier = "IN BOOLEAN MODE" 6124 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6125 modifier = "WITH QUERY EXPANSION" 6126 else: 6127 modifier = None 6128 6129 return self.expression( 6130 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6131 ) 6132 6133 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6134 def _parse_open_json(self) -> exp.OpenJSON: 6135 this = self._parse_bitwise() 6136 path = self._match(TokenType.COMMA) and self._parse_string() 6137 6138 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6139 this = self._parse_field(any_token=True) 6140 kind = self._parse_types() 6141 path = self._parse_string() 6142 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6143 6144 return self.expression( 6145 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6146 ) 6147 6148 expressions = None 6149 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6150 self._match_l_paren() 6151 expressions = self._parse_csv(_parse_open_json_column_def) 6152 6153 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6154 6155 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6156 args = self._parse_csv(self._parse_bitwise) 6157 6158 if self._match(TokenType.IN): 6159 return self.expression( 6160 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6161 ) 6162 6163 if haystack_first: 6164 haystack = seq_get(args, 0) 6165 needle = seq_get(args, 1) 6166 else: 6167 needle = seq_get(args, 0) 6168 haystack = seq_get(args, 1) 6169 6170 return self.expression( 6171 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6172 ) 6173 6174 def _parse_predict(self) -> exp.Predict: 6175 self._match_text_seq("MODEL") 6176 this = self._parse_table() 6177 6178 self._match(TokenType.COMMA) 6179 self._match_text_seq("TABLE") 6180 6181 return self.expression( 6182 exp.Predict, 6183 this=this, 6184 expression=self._parse_table(), 6185 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6186 ) 6187 6188 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6189 args = self._parse_csv(self._parse_table) 6190 return exp.JoinHint(this=func_name.upper(), expressions=args) 6191 6192 def _parse_substring(self) -> exp.Substring: 6193 # Postgres supports the form: substring(string [from int] [for int]) 6194 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6195 6196 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6197 6198 if self._match(TokenType.FROM): 6199 args.append(self._parse_bitwise()) 6200 if self._match(TokenType.FOR): 6201 if len(args) == 1: 6202 args.append(exp.Literal.number(1)) 6203 args.append(self._parse_bitwise()) 6204 6205 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6206 6207 def _parse_trim(self) -> exp.Trim: 6208 # https://www.w3resource.com/sql/character-functions/trim.php 6209 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6210 6211 position = None 6212 collation = None 6213 expression = None 6214 6215 if self._match_texts(self.TRIM_TYPES): 6216 position = self._prev.text.upper() 6217 6218 this = self._parse_bitwise() 6219 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6220 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6221 expression = self._parse_bitwise() 6222 6223 if invert_order: 6224 this, expression = expression, this 6225 6226 if self._match(TokenType.COLLATE): 6227 collation = self._parse_bitwise() 6228 6229 return self.expression( 6230 exp.Trim, this=this, position=position, expression=expression, collation=collation 6231 ) 6232 6233 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6234 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6235 6236 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6237 return self._parse_window(self._parse_id_var(), alias=True) 6238 6239 def _parse_respect_or_ignore_nulls( 6240 self, this: t.Optional[exp.Expression] 6241 ) -> t.Optional[exp.Expression]: 6242 if self._match_text_seq("IGNORE", "NULLS"): 6243 return self.expression(exp.IgnoreNulls, this=this) 6244 if self._match_text_seq("RESPECT", "NULLS"): 6245 return self.expression(exp.RespectNulls, this=this) 6246 return this 6247 6248 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6249 if self._match(TokenType.HAVING): 6250 self._match_texts(("MAX", "MIN")) 6251 max = self._prev.text.upper() != "MIN" 6252 return self.expression( 6253 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6254 ) 6255 6256 return this 6257 6258 def _parse_window( 6259 self, this: t.Optional[exp.Expression], alias: bool = False 6260 ) -> t.Optional[exp.Expression]: 6261 func = this 6262 comments = func.comments if isinstance(func, exp.Expression) else None 6263 6264 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6265 self._match(TokenType.WHERE) 6266 this = self.expression( 6267 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6268 ) 6269 self._match_r_paren() 6270 6271 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6272 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6273 if self._match_text_seq("WITHIN", "GROUP"): 6274 order = self._parse_wrapped(self._parse_order) 6275 this = self.expression(exp.WithinGroup, this=this, expression=order) 6276 6277 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6278 # Some dialects choose to implement and some do not. 6279 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6280 6281 # There is some code above in _parse_lambda that handles 6282 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6283 6284 # The below changes handle 6285 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6286 6287 # Oracle allows both formats 6288 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6289 # and Snowflake chose to do the same for familiarity 6290 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6291 if isinstance(this, exp.AggFunc): 6292 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6293 6294 if ignore_respect and ignore_respect is not this: 6295 ignore_respect.replace(ignore_respect.this) 6296 this = self.expression(ignore_respect.__class__, this=this) 6297 6298 this = self._parse_respect_or_ignore_nulls(this) 6299 6300 # bigquery select from window x AS (partition by ...) 6301 if alias: 6302 over = None 6303 self._match(TokenType.ALIAS) 6304 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6305 return this 6306 else: 6307 over = self._prev.text.upper() 6308 6309 if comments and isinstance(func, exp.Expression): 6310 func.pop_comments() 6311 6312 if not self._match(TokenType.L_PAREN): 6313 return self.expression( 6314 exp.Window, 6315 comments=comments, 6316 this=this, 6317 alias=self._parse_id_var(False), 6318 over=over, 6319 ) 6320 6321 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6322 6323 first = self._match(TokenType.FIRST) 6324 if self._match_text_seq("LAST"): 6325 first = False 6326 6327 partition, order = self._parse_partition_and_order() 6328 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6329 6330 if kind: 6331 self._match(TokenType.BETWEEN) 6332 start = self._parse_window_spec() 6333 self._match(TokenType.AND) 6334 end = self._parse_window_spec() 6335 6336 spec = self.expression( 6337 exp.WindowSpec, 6338 kind=kind, 6339 start=start["value"], 6340 start_side=start["side"], 6341 end=end["value"], 6342 end_side=end["side"], 6343 ) 6344 else: 6345 spec = None 6346 6347 self._match_r_paren() 6348 6349 window = self.expression( 6350 exp.Window, 6351 comments=comments, 6352 this=this, 6353 partition_by=partition, 6354 order=order, 6355 spec=spec, 6356 alias=window_alias, 6357 over=over, 6358 first=first, 6359 ) 6360 6361 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6362 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6363 return self._parse_window(window, alias=alias) 6364 6365 return window 6366 6367 def _parse_partition_and_order( 6368 self, 6369 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6370 return self._parse_partition_by(), self._parse_order() 6371 6372 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6373 self._match(TokenType.BETWEEN) 6374 6375 return { 6376 "value": ( 6377 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6378 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6379 or self._parse_bitwise() 6380 ), 6381 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6382 } 6383 6384 def _parse_alias( 6385 self, this: t.Optional[exp.Expression], explicit: bool = False 6386 ) -> t.Optional[exp.Expression]: 6387 any_token = self._match(TokenType.ALIAS) 6388 comments = self._prev_comments or [] 6389 6390 if explicit and not any_token: 6391 return this 6392 6393 if self._match(TokenType.L_PAREN): 6394 aliases = self.expression( 6395 exp.Aliases, 6396 comments=comments, 6397 this=this, 6398 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6399 ) 6400 self._match_r_paren(aliases) 6401 return aliases 6402 6403 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6404 self.STRING_ALIASES and self._parse_string_as_identifier() 6405 ) 6406 6407 if alias: 6408 comments.extend(alias.pop_comments()) 6409 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6410 column = this.this 6411 6412 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6413 if not this.comments and column and column.comments: 6414 this.comments = column.pop_comments() 6415 6416 return this 6417 6418 def _parse_id_var( 6419 self, 6420 any_token: bool = True, 6421 tokens: t.Optional[t.Collection[TokenType]] = None, 6422 ) -> t.Optional[exp.Expression]: 6423 expression = self._parse_identifier() 6424 if not expression and ( 6425 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6426 ): 6427 quoted = self._prev.token_type == TokenType.STRING 6428 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6429 6430 return expression 6431 6432 def _parse_string(self) -> t.Optional[exp.Expression]: 6433 if self._match_set(self.STRING_PARSERS): 6434 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6435 return self._parse_placeholder() 6436 6437 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6438 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6439 6440 def _parse_number(self) -> t.Optional[exp.Expression]: 6441 if self._match_set(self.NUMERIC_PARSERS): 6442 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6443 return self._parse_placeholder() 6444 6445 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6446 if self._match(TokenType.IDENTIFIER): 6447 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6448 return self._parse_placeholder() 6449 6450 def _parse_var( 6451 self, 6452 any_token: bool = False, 6453 tokens: t.Optional[t.Collection[TokenType]] = None, 6454 upper: bool = False, 6455 ) -> t.Optional[exp.Expression]: 6456 if ( 6457 (any_token and self._advance_any()) 6458 or self._match(TokenType.VAR) 6459 or (self._match_set(tokens) if tokens else False) 6460 ): 6461 return self.expression( 6462 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6463 ) 6464 return self._parse_placeholder() 6465 6466 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6467 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6468 self._advance() 6469 return self._prev 6470 return None 6471 6472 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6473 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6474 6475 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6476 return self._parse_primary() or self._parse_var(any_token=True) 6477 6478 def _parse_null(self) -> t.Optional[exp.Expression]: 6479 if self._match_set(self.NULL_TOKENS): 6480 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6481 return self._parse_placeholder() 6482 6483 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6484 if self._match(TokenType.TRUE): 6485 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6486 if self._match(TokenType.FALSE): 6487 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6488 return self._parse_placeholder() 6489 6490 def _parse_star(self) -> t.Optional[exp.Expression]: 6491 if self._match(TokenType.STAR): 6492 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6493 return self._parse_placeholder() 6494 6495 def _parse_parameter(self) -> exp.Parameter: 6496 this = self._parse_identifier() or self._parse_primary_or_var() 6497 return self.expression(exp.Parameter, this=this) 6498 6499 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6500 if self._match_set(self.PLACEHOLDER_PARSERS): 6501 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6502 if placeholder: 6503 return placeholder 6504 self._advance(-1) 6505 return None 6506 6507 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6508 if not self._match_texts(keywords): 6509 return None 6510 if self._match(TokenType.L_PAREN, advance=False): 6511 return self._parse_wrapped_csv(self._parse_expression) 6512 6513 expression = self._parse_expression() 6514 return [expression] if expression else None 6515 6516 def _parse_csv( 6517 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6518 ) -> t.List[exp.Expression]: 6519 parse_result = parse_method() 6520 items = [parse_result] if parse_result is not None else [] 6521 6522 while self._match(sep): 6523 self._add_comments(parse_result) 6524 parse_result = parse_method() 6525 if parse_result is not None: 6526 items.append(parse_result) 6527 6528 return items 6529 6530 def _parse_tokens( 6531 self, parse_method: t.Callable, expressions: t.Dict 6532 ) -> t.Optional[exp.Expression]: 6533 this = parse_method() 6534 6535 while self._match_set(expressions): 6536 this = self.expression( 6537 expressions[self._prev.token_type], 6538 this=this, 6539 comments=self._prev_comments, 6540 expression=parse_method(), 6541 ) 6542 6543 return this 6544 6545 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6546 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6547 6548 def _parse_wrapped_csv( 6549 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6550 ) -> t.List[exp.Expression]: 6551 return self._parse_wrapped( 6552 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6553 ) 6554 6555 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6556 wrapped = self._match(TokenType.L_PAREN) 6557 if not wrapped and not optional: 6558 self.raise_error("Expecting (") 6559 parse_result = parse_method() 6560 if wrapped: 6561 self._match_r_paren() 6562 return parse_result 6563 6564 def _parse_expressions(self) -> t.List[exp.Expression]: 6565 return self._parse_csv(self._parse_expression) 6566 6567 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6568 return self._parse_select() or self._parse_set_operations( 6569 self._parse_expression() if alias else self._parse_assignment() 6570 ) 6571 6572 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6573 return self._parse_query_modifiers( 6574 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6575 ) 6576 6577 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6578 this = None 6579 if self._match_texts(self.TRANSACTION_KIND): 6580 this = self._prev.text 6581 6582 self._match_texts(("TRANSACTION", "WORK")) 6583 6584 modes = [] 6585 while True: 6586 mode = [] 6587 while self._match(TokenType.VAR): 6588 mode.append(self._prev.text) 6589 6590 if mode: 6591 modes.append(" ".join(mode)) 6592 if not self._match(TokenType.COMMA): 6593 break 6594 6595 return self.expression(exp.Transaction, this=this, modes=modes) 6596 6597 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6598 chain = None 6599 savepoint = None 6600 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6601 6602 self._match_texts(("TRANSACTION", "WORK")) 6603 6604 if self._match_text_seq("TO"): 6605 self._match_text_seq("SAVEPOINT") 6606 savepoint = self._parse_id_var() 6607 6608 if self._match(TokenType.AND): 6609 chain = not self._match_text_seq("NO") 6610 self._match_text_seq("CHAIN") 6611 6612 if is_rollback: 6613 return self.expression(exp.Rollback, savepoint=savepoint) 6614 6615 return self.expression(exp.Commit, chain=chain) 6616 6617 def _parse_refresh(self) -> exp.Refresh: 6618 self._match(TokenType.TABLE) 6619 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6620 6621 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6622 if not self._match_text_seq("ADD"): 6623 return None 6624 6625 self._match(TokenType.COLUMN) 6626 exists_column = self._parse_exists(not_=True) 6627 expression = self._parse_field_def() 6628 6629 if expression: 6630 expression.set("exists", exists_column) 6631 6632 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6633 if self._match_texts(("FIRST", "AFTER")): 6634 position = self._prev.text 6635 column_position = self.expression( 6636 exp.ColumnPosition, this=self._parse_column(), position=position 6637 ) 6638 expression.set("position", column_position) 6639 6640 return expression 6641 6642 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6643 drop = self._match(TokenType.DROP) and self._parse_drop() 6644 if drop and not isinstance(drop, exp.Command): 6645 drop.set("kind", drop.args.get("kind", "COLUMN")) 6646 return drop 6647 6648 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6649 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6650 return self.expression( 6651 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6652 ) 6653 6654 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6655 index = self._index - 1 6656 6657 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6658 return self._parse_csv( 6659 lambda: self.expression( 6660 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6661 ) 6662 ) 6663 6664 self._retreat(index) 6665 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6666 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6667 6668 if self._match_text_seq("ADD", "COLUMNS"): 6669 schema = self._parse_schema() 6670 if schema: 6671 return [schema] 6672 return [] 6673 6674 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6675 6676 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6677 if self._match_texts(self.ALTER_ALTER_PARSERS): 6678 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6679 6680 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6681 # keyword after ALTER we default to parsing this statement 6682 self._match(TokenType.COLUMN) 6683 column = self._parse_field(any_token=True) 6684 6685 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6686 return self.expression(exp.AlterColumn, this=column, drop=True) 6687 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6688 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6689 if self._match(TokenType.COMMENT): 6690 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6691 if self._match_text_seq("DROP", "NOT", "NULL"): 6692 return self.expression( 6693 exp.AlterColumn, 6694 this=column, 6695 drop=True, 6696 allow_null=True, 6697 ) 6698 if self._match_text_seq("SET", "NOT", "NULL"): 6699 return self.expression( 6700 exp.AlterColumn, 6701 this=column, 6702 allow_null=False, 6703 ) 6704 self._match_text_seq("SET", "DATA") 6705 self._match_text_seq("TYPE") 6706 return self.expression( 6707 exp.AlterColumn, 6708 this=column, 6709 dtype=self._parse_types(), 6710 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6711 using=self._match(TokenType.USING) and self._parse_assignment(), 6712 ) 6713 6714 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6715 if self._match_texts(("ALL", "EVEN", "AUTO")): 6716 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6717 6718 self._match_text_seq("KEY", "DISTKEY") 6719 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6720 6721 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6722 if compound: 6723 self._match_text_seq("SORTKEY") 6724 6725 if self._match(TokenType.L_PAREN, advance=False): 6726 return self.expression( 6727 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6728 ) 6729 6730 self._match_texts(("AUTO", "NONE")) 6731 return self.expression( 6732 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6733 ) 6734 6735 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6736 index = self._index - 1 6737 6738 partition_exists = self._parse_exists() 6739 if self._match(TokenType.PARTITION, advance=False): 6740 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6741 6742 self._retreat(index) 6743 return self._parse_csv(self._parse_drop_column) 6744 6745 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6746 if self._match(TokenType.COLUMN): 6747 exists = self._parse_exists() 6748 old_column = self._parse_column() 6749 to = self._match_text_seq("TO") 6750 new_column = self._parse_column() 6751 6752 if old_column is None or to is None or new_column is None: 6753 return None 6754 6755 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6756 6757 self._match_text_seq("TO") 6758 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6759 6760 def _parse_alter_table_set(self) -> exp.AlterSet: 6761 alter_set = self.expression(exp.AlterSet) 6762 6763 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6764 "TABLE", "PROPERTIES" 6765 ): 6766 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6767 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6768 alter_set.set("expressions", [self._parse_assignment()]) 6769 elif self._match_texts(("LOGGED", "UNLOGGED")): 6770 alter_set.set("option", exp.var(self._prev.text.upper())) 6771 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6772 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6773 elif self._match_text_seq("LOCATION"): 6774 alter_set.set("location", self._parse_field()) 6775 elif self._match_text_seq("ACCESS", "METHOD"): 6776 alter_set.set("access_method", self._parse_field()) 6777 elif self._match_text_seq("TABLESPACE"): 6778 alter_set.set("tablespace", self._parse_field()) 6779 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6780 alter_set.set("file_format", [self._parse_field()]) 6781 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6782 alter_set.set("file_format", self._parse_wrapped_options()) 6783 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6784 alter_set.set("copy_options", self._parse_wrapped_options()) 6785 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6786 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6787 else: 6788 if self._match_text_seq("SERDE"): 6789 alter_set.set("serde", self._parse_field()) 6790 6791 alter_set.set("expressions", [self._parse_properties()]) 6792 6793 return alter_set 6794 6795 def _parse_alter(self) -> exp.Alter | exp.Command: 6796 start = self._prev 6797 6798 alter_token = self._match_set(self.ALTERABLES) and self._prev 6799 if not alter_token: 6800 return self._parse_as_command(start) 6801 6802 exists = self._parse_exists() 6803 only = self._match_text_seq("ONLY") 6804 this = self._parse_table(schema=True) 6805 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6806 6807 if self._next: 6808 self._advance() 6809 6810 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6811 if parser: 6812 actions = ensure_list(parser(self)) 6813 not_valid = self._match_text_seq("NOT", "VALID") 6814 options = self._parse_csv(self._parse_property) 6815 6816 if not self._curr and actions: 6817 return self.expression( 6818 exp.Alter, 6819 this=this, 6820 kind=alter_token.text.upper(), 6821 exists=exists, 6822 actions=actions, 6823 only=only, 6824 options=options, 6825 cluster=cluster, 6826 not_valid=not_valid, 6827 ) 6828 6829 return self._parse_as_command(start) 6830 6831 def _parse_merge(self) -> exp.Merge: 6832 self._match(TokenType.INTO) 6833 target = self._parse_table() 6834 6835 if target and self._match(TokenType.ALIAS, advance=False): 6836 target.set("alias", self._parse_table_alias()) 6837 6838 self._match(TokenType.USING) 6839 using = self._parse_table() 6840 6841 self._match(TokenType.ON) 6842 on = self._parse_assignment() 6843 6844 return self.expression( 6845 exp.Merge, 6846 this=target, 6847 using=using, 6848 on=on, 6849 expressions=self._parse_when_matched(), 6850 returning=self._parse_returning(), 6851 ) 6852 6853 def _parse_when_matched(self) -> t.List[exp.When]: 6854 whens = [] 6855 6856 while self._match(TokenType.WHEN): 6857 matched = not self._match(TokenType.NOT) 6858 self._match_text_seq("MATCHED") 6859 source = ( 6860 False 6861 if self._match_text_seq("BY", "TARGET") 6862 else self._match_text_seq("BY", "SOURCE") 6863 ) 6864 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6865 6866 self._match(TokenType.THEN) 6867 6868 if self._match(TokenType.INSERT): 6869 this = self._parse_star() 6870 if this: 6871 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6872 else: 6873 then = self.expression( 6874 exp.Insert, 6875 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6876 expression=self._match_text_seq("VALUES") and self._parse_value(), 6877 ) 6878 elif self._match(TokenType.UPDATE): 6879 expressions = self._parse_star() 6880 if expressions: 6881 then = self.expression(exp.Update, expressions=expressions) 6882 else: 6883 then = self.expression( 6884 exp.Update, 6885 expressions=self._match(TokenType.SET) 6886 and self._parse_csv(self._parse_equality), 6887 ) 6888 elif self._match(TokenType.DELETE): 6889 then = self.expression(exp.Var, this=self._prev.text) 6890 else: 6891 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6892 6893 whens.append( 6894 self.expression( 6895 exp.When, 6896 matched=matched, 6897 source=source, 6898 condition=condition, 6899 then=then, 6900 ) 6901 ) 6902 return whens 6903 6904 def _parse_show(self) -> t.Optional[exp.Expression]: 6905 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6906 if parser: 6907 return parser(self) 6908 return self._parse_as_command(self._prev) 6909 6910 def _parse_set_item_assignment( 6911 self, kind: t.Optional[str] = None 6912 ) -> t.Optional[exp.Expression]: 6913 index = self._index 6914 6915 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6916 return self._parse_set_transaction(global_=kind == "GLOBAL") 6917 6918 left = self._parse_primary() or self._parse_column() 6919 assignment_delimiter = self._match_texts(("=", "TO")) 6920 6921 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6922 self._retreat(index) 6923 return None 6924 6925 right = self._parse_statement() or self._parse_id_var() 6926 if isinstance(right, (exp.Column, exp.Identifier)): 6927 right = exp.var(right.name) 6928 6929 this = self.expression(exp.EQ, this=left, expression=right) 6930 return self.expression(exp.SetItem, this=this, kind=kind) 6931 6932 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6933 self._match_text_seq("TRANSACTION") 6934 characteristics = self._parse_csv( 6935 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6936 ) 6937 return self.expression( 6938 exp.SetItem, 6939 expressions=characteristics, 6940 kind="TRANSACTION", 6941 **{"global": global_}, # type: ignore 6942 ) 6943 6944 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6945 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6946 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6947 6948 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6949 index = self._index 6950 set_ = self.expression( 6951 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6952 ) 6953 6954 if self._curr: 6955 self._retreat(index) 6956 return self._parse_as_command(self._prev) 6957 6958 return set_ 6959 6960 def _parse_var_from_options( 6961 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6962 ) -> t.Optional[exp.Var]: 6963 start = self._curr 6964 if not start: 6965 return None 6966 6967 option = start.text.upper() 6968 continuations = options.get(option) 6969 6970 index = self._index 6971 self._advance() 6972 for keywords in continuations or []: 6973 if isinstance(keywords, str): 6974 keywords = (keywords,) 6975 6976 if self._match_text_seq(*keywords): 6977 option = f"{option} {' '.join(keywords)}" 6978 break 6979 else: 6980 if continuations or continuations is None: 6981 if raise_unmatched: 6982 self.raise_error(f"Unknown option {option}") 6983 6984 self._retreat(index) 6985 return None 6986 6987 return exp.var(option) 6988 6989 def _parse_as_command(self, start: Token) -> exp.Command: 6990 while self._curr: 6991 self._advance() 6992 text = self._find_sql(start, self._prev) 6993 size = len(start.text) 6994 self._warn_unsupported() 6995 return exp.Command(this=text[:size], expression=text[size:]) 6996 6997 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6998 settings = [] 6999 7000 self._match_l_paren() 7001 kind = self._parse_id_var() 7002 7003 if self._match(TokenType.L_PAREN): 7004 while True: 7005 key = self._parse_id_var() 7006 value = self._parse_primary() 7007 7008 if not key and value is None: 7009 break 7010 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7011 self._match(TokenType.R_PAREN) 7012 7013 self._match_r_paren() 7014 7015 return self.expression( 7016 exp.DictProperty, 7017 this=this, 7018 kind=kind.this if kind else None, 7019 settings=settings, 7020 ) 7021 7022 def _parse_dict_range(self, this: str) -> exp.DictRange: 7023 self._match_l_paren() 7024 has_min = self._match_text_seq("MIN") 7025 if has_min: 7026 min = self._parse_var() or self._parse_primary() 7027 self._match_text_seq("MAX") 7028 max = self._parse_var() or self._parse_primary() 7029 else: 7030 max = self._parse_var() or self._parse_primary() 7031 min = exp.Literal.number(0) 7032 self._match_r_paren() 7033 return self.expression(exp.DictRange, this=this, min=min, max=max) 7034 7035 def _parse_comprehension( 7036 self, this: t.Optional[exp.Expression] 7037 ) -> t.Optional[exp.Comprehension]: 7038 index = self._index 7039 expression = self._parse_column() 7040 if not self._match(TokenType.IN): 7041 self._retreat(index - 1) 7042 return None 7043 iterator = self._parse_column() 7044 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7045 return self.expression( 7046 exp.Comprehension, 7047 this=this, 7048 expression=expression, 7049 iterator=iterator, 7050 condition=condition, 7051 ) 7052 7053 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7054 if self._match(TokenType.HEREDOC_STRING): 7055 return self.expression(exp.Heredoc, this=self._prev.text) 7056 7057 if not self._match_text_seq("$"): 7058 return None 7059 7060 tags = ["$"] 7061 tag_text = None 7062 7063 if self._is_connected(): 7064 self._advance() 7065 tags.append(self._prev.text.upper()) 7066 else: 7067 self.raise_error("No closing $ found") 7068 7069 if tags[-1] != "$": 7070 if self._is_connected() and self._match_text_seq("$"): 7071 tag_text = tags[-1] 7072 tags.append("$") 7073 else: 7074 self.raise_error("No closing $ found") 7075 7076 heredoc_start = self._curr 7077 7078 while self._curr: 7079 if self._match_text_seq(*tags, advance=False): 7080 this = self._find_sql(heredoc_start, self._prev) 7081 self._advance(len(tags)) 7082 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7083 7084 self._advance() 7085 7086 self.raise_error(f"No closing {''.join(tags)} found") 7087 return None 7088 7089 def _find_parser( 7090 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7091 ) -> t.Optional[t.Callable]: 7092 if not self._curr: 7093 return None 7094 7095 index = self._index 7096 this = [] 7097 while True: 7098 # The current token might be multiple words 7099 curr = self._curr.text.upper() 7100 key = curr.split(" ") 7101 this.append(curr) 7102 7103 self._advance() 7104 result, trie = in_trie(trie, key) 7105 if result == TrieResult.FAILED: 7106 break 7107 7108 if result == TrieResult.EXISTS: 7109 subparser = parsers[" ".join(this)] 7110 return subparser 7111 7112 self._retreat(index) 7113 return None 7114 7115 def _match(self, token_type, advance=True, expression=None): 7116 if not self._curr: 7117 return None 7118 7119 if self._curr.token_type == token_type: 7120 if advance: 7121 self._advance() 7122 self._add_comments(expression) 7123 return True 7124 7125 return None 7126 7127 def _match_set(self, types, advance=True): 7128 if not self._curr: 7129 return None 7130 7131 if self._curr.token_type in types: 7132 if advance: 7133 self._advance() 7134 return True 7135 7136 return None 7137 7138 def _match_pair(self, token_type_a, token_type_b, advance=True): 7139 if not self._curr or not self._next: 7140 return None 7141 7142 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7143 if advance: 7144 self._advance(2) 7145 return True 7146 7147 return None 7148 7149 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7150 if not self._match(TokenType.L_PAREN, expression=expression): 7151 self.raise_error("Expecting (") 7152 7153 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7154 if not self._match(TokenType.R_PAREN, expression=expression): 7155 self.raise_error("Expecting )") 7156 7157 def _match_texts(self, texts, advance=True): 7158 if ( 7159 self._curr 7160 and self._curr.token_type != TokenType.STRING 7161 and self._curr.text.upper() in texts 7162 ): 7163 if advance: 7164 self._advance() 7165 return True 7166 return None 7167 7168 def _match_text_seq(self, *texts, advance=True): 7169 index = self._index 7170 for text in texts: 7171 if ( 7172 self._curr 7173 and self._curr.token_type != TokenType.STRING 7174 and self._curr.text.upper() == text 7175 ): 7176 self._advance() 7177 else: 7178 self._retreat(index) 7179 return None 7180 7181 if not advance: 7182 self._retreat(index) 7183 7184 return True 7185 7186 def _replace_lambda( 7187 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7188 ) -> t.Optional[exp.Expression]: 7189 if not node: 7190 return node 7191 7192 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7193 7194 for column in node.find_all(exp.Column): 7195 typ = lambda_types.get(column.parts[0].name) 7196 if typ is not None: 7197 dot_or_id = column.to_dot() if column.table else column.this 7198 7199 if typ: 7200 dot_or_id = self.expression( 7201 exp.Cast, 7202 this=dot_or_id, 7203 to=typ, 7204 ) 7205 7206 parent = column.parent 7207 7208 while isinstance(parent, exp.Dot): 7209 if not isinstance(parent.parent, exp.Dot): 7210 parent.replace(dot_or_id) 7211 break 7212 parent = parent.parent 7213 else: 7214 if column is node: 7215 node = dot_or_id 7216 else: 7217 column.replace(dot_or_id) 7218 return node 7219 7220 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7221 start = self._prev 7222 7223 # Not to be confused with TRUNCATE(number, decimals) function call 7224 if self._match(TokenType.L_PAREN): 7225 self._retreat(self._index - 2) 7226 return self._parse_function() 7227 7228 # Clickhouse supports TRUNCATE DATABASE as well 7229 is_database = self._match(TokenType.DATABASE) 7230 7231 self._match(TokenType.TABLE) 7232 7233 exists = self._parse_exists(not_=False) 7234 7235 expressions = self._parse_csv( 7236 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7237 ) 7238 7239 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7240 7241 if self._match_text_seq("RESTART", "IDENTITY"): 7242 identity = "RESTART" 7243 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7244 identity = "CONTINUE" 7245 else: 7246 identity = None 7247 7248 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7249 option = self._prev.text 7250 else: 7251 option = None 7252 7253 partition = self._parse_partition() 7254 7255 # Fallback case 7256 if self._curr: 7257 return self._parse_as_command(start) 7258 7259 return self.expression( 7260 exp.TruncateTable, 7261 expressions=expressions, 7262 is_database=is_database, 7263 exists=exists, 7264 cluster=cluster, 7265 identity=identity, 7266 option=option, 7267 partition=partition, 7268 ) 7269 7270 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7271 this = self._parse_ordered(self._parse_opclass) 7272 7273 if not self._match(TokenType.WITH): 7274 return this 7275 7276 op = self._parse_var(any_token=True) 7277 7278 return self.expression(exp.WithOperator, this=this, op=op) 7279 7280 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7281 self._match(TokenType.EQ) 7282 self._match(TokenType.L_PAREN) 7283 7284 opts: t.List[t.Optional[exp.Expression]] = [] 7285 while self._curr and not self._match(TokenType.R_PAREN): 7286 if self._match_text_seq("FORMAT_NAME", "="): 7287 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7288 # so we parse it separately to use _parse_field() 7289 prop = self.expression( 7290 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7291 ) 7292 opts.append(prop) 7293 else: 7294 opts.append(self._parse_property()) 7295 7296 self._match(TokenType.COMMA) 7297 7298 return opts 7299 7300 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7301 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7302 7303 options = [] 7304 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7305 option = self._parse_var(any_token=True) 7306 prev = self._prev.text.upper() 7307 7308 # Different dialects might separate options and values by white space, "=" and "AS" 7309 self._match(TokenType.EQ) 7310 self._match(TokenType.ALIAS) 7311 7312 param = self.expression(exp.CopyParameter, this=option) 7313 7314 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7315 TokenType.L_PAREN, advance=False 7316 ): 7317 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7318 param.set("expressions", self._parse_wrapped_options()) 7319 elif prev == "FILE_FORMAT": 7320 # T-SQL's external file format case 7321 param.set("expression", self._parse_field()) 7322 else: 7323 param.set("expression", self._parse_unquoted_field()) 7324 7325 options.append(param) 7326 self._match(sep) 7327 7328 return options 7329 7330 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7331 expr = self.expression(exp.Credentials) 7332 7333 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7334 expr.set("storage", self._parse_field()) 7335 if self._match_text_seq("CREDENTIALS"): 7336 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7337 creds = ( 7338 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7339 ) 7340 expr.set("credentials", creds) 7341 if self._match_text_seq("ENCRYPTION"): 7342 expr.set("encryption", self._parse_wrapped_options()) 7343 if self._match_text_seq("IAM_ROLE"): 7344 expr.set("iam_role", self._parse_field()) 7345 if self._match_text_seq("REGION"): 7346 expr.set("region", self._parse_field()) 7347 7348 return expr 7349 7350 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7351 return self._parse_field() 7352 7353 def _parse_copy(self) -> exp.Copy | exp.Command: 7354 start = self._prev 7355 7356 self._match(TokenType.INTO) 7357 7358 this = ( 7359 self._parse_select(nested=True, parse_subquery_alias=False) 7360 if self._match(TokenType.L_PAREN, advance=False) 7361 else self._parse_table(schema=True) 7362 ) 7363 7364 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7365 7366 files = self._parse_csv(self._parse_file_location) 7367 credentials = self._parse_credentials() 7368 7369 self._match_text_seq("WITH") 7370 7371 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7372 7373 # Fallback case 7374 if self._curr: 7375 return self._parse_as_command(start) 7376 7377 return self.expression( 7378 exp.Copy, 7379 this=this, 7380 kind=kind, 7381 credentials=credentials, 7382 files=files, 7383 params=params, 7384 ) 7385 7386 def _parse_normalize(self) -> exp.Normalize: 7387 return self.expression( 7388 exp.Normalize, 7389 this=self._parse_bitwise(), 7390 form=self._match(TokenType.COMMA) and self._parse_var(), 7391 ) 7392 7393 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7394 if self._match_text_seq("COLUMNS", "(", advance=False): 7395 return exp.UnpackColumns(this=self._parse_function()) 7396 7397 return self.expression( 7398 exp.Star, 7399 **{ # type: ignore 7400 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7401 "replace": self._parse_star_op("REPLACE"), 7402 "rename": self._parse_star_op("RENAME"), 7403 }, 7404 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1352 def __init__( 1353 self, 1354 error_level: t.Optional[ErrorLevel] = None, 1355 error_message_context: int = 100, 1356 max_errors: int = 3, 1357 dialect: DialectType = None, 1358 ): 1359 from sqlglot.dialects import Dialect 1360 1361 self.error_level = error_level or ErrorLevel.IMMEDIATE 1362 self.error_message_context = error_message_context 1363 self.max_errors = max_errors 1364 self.dialect = Dialect.get_or_raise(dialect) 1365 self.reset()
1377 def parse( 1378 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1379 ) -> t.List[t.Optional[exp.Expression]]: 1380 """ 1381 Parses a list of tokens and returns a list of syntax trees, one tree 1382 per parsed SQL statement. 1383 1384 Args: 1385 raw_tokens: The list of tokens. 1386 sql: The original SQL string, used to produce helpful debug messages. 1387 1388 Returns: 1389 The list of the produced syntax trees. 1390 """ 1391 return self._parse( 1392 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1393 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1395 def parse_into( 1396 self, 1397 expression_types: exp.IntoType, 1398 raw_tokens: t.List[Token], 1399 sql: t.Optional[str] = None, 1400 ) -> t.List[t.Optional[exp.Expression]]: 1401 """ 1402 Parses a list of tokens into a given Expression type. If a collection of Expression 1403 types is given instead, this method will try to parse the token list into each one 1404 of them, stopping at the first for which the parsing succeeds. 1405 1406 Args: 1407 expression_types: The expression type(s) to try and parse the token list into. 1408 raw_tokens: The list of tokens. 1409 sql: The original SQL string, used to produce helpful debug messages. 1410 1411 Returns: 1412 The target Expression. 1413 """ 1414 errors = [] 1415 for expression_type in ensure_list(expression_types): 1416 parser = self.EXPRESSION_PARSERS.get(expression_type) 1417 if not parser: 1418 raise TypeError(f"No parser registered for {expression_type}") 1419 1420 try: 1421 return self._parse(parser, raw_tokens, sql) 1422 except ParseError as e: 1423 e.errors[0]["into_expression"] = expression_type 1424 errors.append(e) 1425 1426 raise ParseError( 1427 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1428 errors=merge_errors(errors), 1429 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1469 def check_errors(self) -> None: 1470 """Logs or raises any found errors, depending on the chosen error level setting.""" 1471 if self.error_level == ErrorLevel.WARN: 1472 for error in self.errors: 1473 logger.error(str(error)) 1474 elif self.error_level == ErrorLevel.RAISE and self.errors: 1475 raise ParseError( 1476 concat_messages(self.errors, self.max_errors), 1477 errors=merge_errors(self.errors), 1478 )
Logs or raises any found errors, depending on the chosen error level setting.
1480 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1481 """ 1482 Appends an error in the list of recorded errors or raises it, depending on the chosen 1483 error level setting. 1484 """ 1485 token = token or self._curr or self._prev or Token.string("") 1486 start = token.start 1487 end = token.end + 1 1488 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1489 highlight = self.sql[start:end] 1490 end_context = self.sql[end : end + self.error_message_context] 1491 1492 error = ParseError.new( 1493 f"{message}. Line {token.line}, Col: {token.col}.\n" 1494 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1495 description=message, 1496 line=token.line, 1497 col=token.col, 1498 start_context=start_context, 1499 highlight=highlight, 1500 end_context=end_context, 1501 ) 1502 1503 if self.error_level == ErrorLevel.IMMEDIATE: 1504 raise error 1505 1506 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1508 def expression( 1509 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1510 ) -> E: 1511 """ 1512 Creates a new, validated Expression. 1513 1514 Args: 1515 exp_class: The expression class to instantiate. 1516 comments: An optional list of comments to attach to the expression. 1517 kwargs: The arguments to set for the expression along with their respective values. 1518 1519 Returns: 1520 The target expression. 1521 """ 1522 instance = exp_class(**kwargs) 1523 instance.add_comments(comments) if comments else self._add_comments(instance) 1524 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1531 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1532 """ 1533 Validates an Expression, making sure that all its mandatory arguments are set. 1534 1535 Args: 1536 expression: The expression to validate. 1537 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1538 1539 Returns: 1540 The validated expression. 1541 """ 1542 if self.error_level != ErrorLevel.IGNORE: 1543 for error_message in expression.error_messages(args): 1544 self.raise_error(error_message) 1545 1546 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.