sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120class _Parser(type): 121 def __new__(cls, clsname, bases, attrs): 122 klass = super().__new__(cls, clsname, bases, attrs) 123 124 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 125 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 126 127 return klass 128 129 130class Parser(metaclass=_Parser): 131 """ 132 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 133 134 Args: 135 error_level: The desired error level. 136 Default: ErrorLevel.IMMEDIATE 137 error_message_context: The amount of context to capture from a query string when displaying 138 the error message (in number of characters). 139 Default: 100 140 max_errors: Maximum number of error messages to include in a raised ParseError. 141 This is only relevant if error_level is ErrorLevel.RAISE. 142 Default: 3 143 """ 144 145 FUNCTIONS: t.Dict[str, t.Callable] = { 146 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 147 "CONCAT": lambda args, dialect: exp.Concat( 148 expressions=args, 149 safe=not dialect.STRICT_STRING_CONCAT, 150 coalesce=dialect.CONCAT_COALESCE, 151 ), 152 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 153 expressions=args, 154 safe=not dialect.STRICT_STRING_CONCAT, 155 coalesce=dialect.CONCAT_COALESCE, 156 ), 157 "DATE_TO_DATE_STR": lambda args: exp.Cast( 158 this=seq_get(args, 0), 159 to=exp.DataType(this=exp.DataType.Type.TEXT), 160 ), 161 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 162 "HEX": build_hex, 163 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 164 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 165 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 166 "LIKE": build_like, 167 "LOG": build_logarithm, 168 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 169 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 170 "LOWER": build_lower, 171 "LPAD": lambda args: build_pad(args), 172 "LEFTPAD": lambda args: build_pad(args), 173 "MOD": build_mod, 174 "RPAD": lambda args: build_pad(args, is_left=False), 175 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 176 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 177 if len(args) != 2 178 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 179 "TIME_TO_TIME_STR": lambda args: exp.Cast( 180 this=seq_get(args, 0), 181 to=exp.DataType(this=exp.DataType.Type.TEXT), 182 ), 183 "TO_HEX": build_hex, 184 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 185 this=exp.Cast( 186 this=seq_get(args, 0), 187 to=exp.DataType(this=exp.DataType.Type.TEXT), 188 ), 189 start=exp.Literal.number(1), 190 length=exp.Literal.number(10), 191 ), 192 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 193 "UPPER": build_upper, 194 "VAR_MAP": build_var_map, 195 } 196 197 NO_PAREN_FUNCTIONS = { 198 TokenType.CURRENT_DATE: exp.CurrentDate, 199 TokenType.CURRENT_DATETIME: exp.CurrentDate, 200 TokenType.CURRENT_TIME: exp.CurrentTime, 201 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 202 TokenType.CURRENT_USER: exp.CurrentUser, 203 } 204 205 STRUCT_TYPE_TOKENS = { 206 TokenType.NESTED, 207 TokenType.OBJECT, 208 TokenType.STRUCT, 209 } 210 211 NESTED_TYPE_TOKENS = { 212 TokenType.ARRAY, 213 TokenType.LIST, 214 TokenType.LOWCARDINALITY, 215 TokenType.MAP, 216 TokenType.NULLABLE, 217 *STRUCT_TYPE_TOKENS, 218 } 219 220 ENUM_TYPE_TOKENS = { 221 TokenType.ENUM, 222 TokenType.ENUM8, 223 TokenType.ENUM16, 224 } 225 226 AGGREGATE_TYPE_TOKENS = { 227 TokenType.AGGREGATEFUNCTION, 228 TokenType.SIMPLEAGGREGATEFUNCTION, 229 } 230 231 TYPE_TOKENS = { 232 TokenType.BIT, 233 TokenType.BOOLEAN, 234 TokenType.TINYINT, 235 TokenType.UTINYINT, 236 TokenType.SMALLINT, 237 TokenType.USMALLINT, 238 TokenType.INT, 239 TokenType.UINT, 240 TokenType.BIGINT, 241 TokenType.UBIGINT, 242 TokenType.INT128, 243 TokenType.UINT128, 244 TokenType.INT256, 245 TokenType.UINT256, 246 TokenType.MEDIUMINT, 247 TokenType.UMEDIUMINT, 248 TokenType.FIXEDSTRING, 249 TokenType.FLOAT, 250 TokenType.DOUBLE, 251 TokenType.CHAR, 252 TokenType.NCHAR, 253 TokenType.VARCHAR, 254 TokenType.NVARCHAR, 255 TokenType.BPCHAR, 256 TokenType.TEXT, 257 TokenType.MEDIUMTEXT, 258 TokenType.LONGTEXT, 259 TokenType.MEDIUMBLOB, 260 TokenType.LONGBLOB, 261 TokenType.BINARY, 262 TokenType.VARBINARY, 263 TokenType.JSON, 264 TokenType.JSONB, 265 TokenType.INTERVAL, 266 TokenType.TINYBLOB, 267 TokenType.TINYTEXT, 268 TokenType.TIME, 269 TokenType.TIMETZ, 270 TokenType.TIMESTAMP, 271 TokenType.TIMESTAMP_S, 272 TokenType.TIMESTAMP_MS, 273 TokenType.TIMESTAMP_NS, 274 TokenType.TIMESTAMPTZ, 275 TokenType.TIMESTAMPLTZ, 276 TokenType.TIMESTAMPNTZ, 277 TokenType.DATETIME, 278 TokenType.DATETIME64, 279 TokenType.DATE, 280 TokenType.DATE32, 281 TokenType.INT4RANGE, 282 TokenType.INT4MULTIRANGE, 283 TokenType.INT8RANGE, 284 TokenType.INT8MULTIRANGE, 285 TokenType.NUMRANGE, 286 TokenType.NUMMULTIRANGE, 287 TokenType.TSRANGE, 288 TokenType.TSMULTIRANGE, 289 TokenType.TSTZRANGE, 290 TokenType.TSTZMULTIRANGE, 291 TokenType.DATERANGE, 292 TokenType.DATEMULTIRANGE, 293 TokenType.DECIMAL, 294 TokenType.UDECIMAL, 295 TokenType.BIGDECIMAL, 296 TokenType.UUID, 297 TokenType.GEOGRAPHY, 298 TokenType.GEOMETRY, 299 TokenType.HLLSKETCH, 300 TokenType.HSTORE, 301 TokenType.PSEUDO_TYPE, 302 TokenType.SUPER, 303 TokenType.SERIAL, 304 TokenType.SMALLSERIAL, 305 TokenType.BIGSERIAL, 306 TokenType.XML, 307 TokenType.YEAR, 308 TokenType.UNIQUEIDENTIFIER, 309 TokenType.USERDEFINED, 310 TokenType.MONEY, 311 TokenType.SMALLMONEY, 312 TokenType.ROWVERSION, 313 TokenType.IMAGE, 314 TokenType.VARIANT, 315 TokenType.VECTOR, 316 TokenType.OBJECT, 317 TokenType.OBJECT_IDENTIFIER, 318 TokenType.INET, 319 TokenType.IPADDRESS, 320 TokenType.IPPREFIX, 321 TokenType.IPV4, 322 TokenType.IPV6, 323 TokenType.UNKNOWN, 324 TokenType.NULL, 325 TokenType.NAME, 326 TokenType.TDIGEST, 327 *ENUM_TYPE_TOKENS, 328 *NESTED_TYPE_TOKENS, 329 *AGGREGATE_TYPE_TOKENS, 330 } 331 332 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 333 TokenType.BIGINT: TokenType.UBIGINT, 334 TokenType.INT: TokenType.UINT, 335 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 336 TokenType.SMALLINT: TokenType.USMALLINT, 337 TokenType.TINYINT: TokenType.UTINYINT, 338 TokenType.DECIMAL: TokenType.UDECIMAL, 339 } 340 341 SUBQUERY_PREDICATES = { 342 TokenType.ANY: exp.Any, 343 TokenType.ALL: exp.All, 344 TokenType.EXISTS: exp.Exists, 345 TokenType.SOME: exp.Any, 346 } 347 348 RESERVED_TOKENS = { 349 *Tokenizer.SINGLE_TOKENS.values(), 350 TokenType.SELECT, 351 } - {TokenType.IDENTIFIER} 352 353 DB_CREATABLES = { 354 TokenType.DATABASE, 355 TokenType.DICTIONARY, 356 TokenType.MODEL, 357 TokenType.SCHEMA, 358 TokenType.SEQUENCE, 359 TokenType.STORAGE_INTEGRATION, 360 TokenType.TABLE, 361 TokenType.TAG, 362 TokenType.VIEW, 363 TokenType.WAREHOUSE, 364 TokenType.STREAMLIT, 365 } 366 367 CREATABLES = { 368 TokenType.COLUMN, 369 TokenType.CONSTRAINT, 370 TokenType.FOREIGN_KEY, 371 TokenType.FUNCTION, 372 TokenType.INDEX, 373 TokenType.PROCEDURE, 374 *DB_CREATABLES, 375 } 376 377 # Tokens that can represent identifiers 378 ID_VAR_TOKENS = { 379 TokenType.ALL, 380 TokenType.VAR, 381 TokenType.ANTI, 382 TokenType.APPLY, 383 TokenType.ASC, 384 TokenType.ASOF, 385 TokenType.AUTO_INCREMENT, 386 TokenType.BEGIN, 387 TokenType.BPCHAR, 388 TokenType.CACHE, 389 TokenType.CASE, 390 TokenType.COLLATE, 391 TokenType.COMMAND, 392 TokenType.COMMENT, 393 TokenType.COMMIT, 394 TokenType.CONSTRAINT, 395 TokenType.COPY, 396 TokenType.CUBE, 397 TokenType.DEFAULT, 398 TokenType.DELETE, 399 TokenType.DESC, 400 TokenType.DESCRIBE, 401 TokenType.DICTIONARY, 402 TokenType.DIV, 403 TokenType.END, 404 TokenType.EXECUTE, 405 TokenType.ESCAPE, 406 TokenType.FALSE, 407 TokenType.FIRST, 408 TokenType.FILTER, 409 TokenType.FINAL, 410 TokenType.FORMAT, 411 TokenType.FULL, 412 TokenType.IDENTIFIER, 413 TokenType.IS, 414 TokenType.ISNULL, 415 TokenType.INTERVAL, 416 TokenType.KEEP, 417 TokenType.KILL, 418 TokenType.LEFT, 419 TokenType.LOAD, 420 TokenType.MERGE, 421 TokenType.NATURAL, 422 TokenType.NEXT, 423 TokenType.OFFSET, 424 TokenType.OPERATOR, 425 TokenType.ORDINALITY, 426 TokenType.OVERLAPS, 427 TokenType.OVERWRITE, 428 TokenType.PARTITION, 429 TokenType.PERCENT, 430 TokenType.PIVOT, 431 TokenType.PRAGMA, 432 TokenType.RANGE, 433 TokenType.RECURSIVE, 434 TokenType.REFERENCES, 435 TokenType.REFRESH, 436 TokenType.REPLACE, 437 TokenType.RIGHT, 438 TokenType.ROLLUP, 439 TokenType.ROW, 440 TokenType.ROWS, 441 TokenType.SEMI, 442 TokenType.SET, 443 TokenType.SETTINGS, 444 TokenType.SHOW, 445 TokenType.TEMPORARY, 446 TokenType.TOP, 447 TokenType.TRUE, 448 TokenType.TRUNCATE, 449 TokenType.UNIQUE, 450 TokenType.UNNEST, 451 TokenType.UNPIVOT, 452 TokenType.UPDATE, 453 TokenType.USE, 454 TokenType.VOLATILE, 455 TokenType.WINDOW, 456 *CREATABLES, 457 *SUBQUERY_PREDICATES, 458 *TYPE_TOKENS, 459 *NO_PAREN_FUNCTIONS, 460 } 461 462 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 463 464 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 465 TokenType.ANTI, 466 TokenType.APPLY, 467 TokenType.ASOF, 468 TokenType.FULL, 469 TokenType.LEFT, 470 TokenType.LOCK, 471 TokenType.NATURAL, 472 TokenType.OFFSET, 473 TokenType.RIGHT, 474 TokenType.SEMI, 475 TokenType.WINDOW, 476 } 477 478 ALIAS_TOKENS = ID_VAR_TOKENS 479 480 ARRAY_CONSTRUCTORS = { 481 "ARRAY": exp.Array, 482 "LIST": exp.List, 483 } 484 485 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 486 487 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 488 489 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 490 491 FUNC_TOKENS = { 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.CURRENT_DATE, 495 TokenType.CURRENT_DATETIME, 496 TokenType.CURRENT_TIMESTAMP, 497 TokenType.CURRENT_TIME, 498 TokenType.CURRENT_USER, 499 TokenType.FILTER, 500 TokenType.FIRST, 501 TokenType.FORMAT, 502 TokenType.GLOB, 503 TokenType.IDENTIFIER, 504 TokenType.INDEX, 505 TokenType.ISNULL, 506 TokenType.ILIKE, 507 TokenType.INSERT, 508 TokenType.LIKE, 509 TokenType.MERGE, 510 TokenType.OFFSET, 511 TokenType.PRIMARY_KEY, 512 TokenType.RANGE, 513 TokenType.REPLACE, 514 TokenType.RLIKE, 515 TokenType.ROW, 516 TokenType.UNNEST, 517 TokenType.VAR, 518 TokenType.LEFT, 519 TokenType.RIGHT, 520 TokenType.SEQUENCE, 521 TokenType.DATE, 522 TokenType.DATETIME, 523 TokenType.TABLE, 524 TokenType.TIMESTAMP, 525 TokenType.TIMESTAMPTZ, 526 TokenType.TRUNCATE, 527 TokenType.WINDOW, 528 TokenType.XOR, 529 *TYPE_TOKENS, 530 *SUBQUERY_PREDICATES, 531 } 532 533 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 534 TokenType.AND: exp.And, 535 } 536 537 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 538 TokenType.COLON_EQ: exp.PropertyEQ, 539 } 540 541 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 542 TokenType.OR: exp.Or, 543 } 544 545 EQUALITY = { 546 TokenType.EQ: exp.EQ, 547 TokenType.NEQ: exp.NEQ, 548 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 549 } 550 551 COMPARISON = { 552 TokenType.GT: exp.GT, 553 TokenType.GTE: exp.GTE, 554 TokenType.LT: exp.LT, 555 TokenType.LTE: exp.LTE, 556 } 557 558 BITWISE = { 559 TokenType.AMP: exp.BitwiseAnd, 560 TokenType.CARET: exp.BitwiseXor, 561 TokenType.PIPE: exp.BitwiseOr, 562 } 563 564 TERM = { 565 TokenType.DASH: exp.Sub, 566 TokenType.PLUS: exp.Add, 567 TokenType.MOD: exp.Mod, 568 TokenType.COLLATE: exp.Collate, 569 } 570 571 FACTOR = { 572 TokenType.DIV: exp.IntDiv, 573 TokenType.LR_ARROW: exp.Distance, 574 TokenType.SLASH: exp.Div, 575 TokenType.STAR: exp.Mul, 576 } 577 578 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 579 580 TIMES = { 581 TokenType.TIME, 582 TokenType.TIMETZ, 583 } 584 585 TIMESTAMPS = { 586 TokenType.TIMESTAMP, 587 TokenType.TIMESTAMPTZ, 588 TokenType.TIMESTAMPLTZ, 589 *TIMES, 590 } 591 592 SET_OPERATIONS = { 593 TokenType.UNION, 594 TokenType.INTERSECT, 595 TokenType.EXCEPT, 596 } 597 598 JOIN_METHODS = { 599 TokenType.ASOF, 600 TokenType.NATURAL, 601 TokenType.POSITIONAL, 602 } 603 604 JOIN_SIDES = { 605 TokenType.LEFT, 606 TokenType.RIGHT, 607 TokenType.FULL, 608 } 609 610 JOIN_KINDS = { 611 TokenType.ANTI, 612 TokenType.CROSS, 613 TokenType.INNER, 614 TokenType.OUTER, 615 TokenType.SEMI, 616 TokenType.STRAIGHT_JOIN, 617 } 618 619 JOIN_HINTS: t.Set[str] = set() 620 621 LAMBDAS = { 622 TokenType.ARROW: lambda self, expressions: self.expression( 623 exp.Lambda, 624 this=self._replace_lambda( 625 self._parse_assignment(), 626 expressions, 627 ), 628 expressions=expressions, 629 ), 630 TokenType.FARROW: lambda self, expressions: self.expression( 631 exp.Kwarg, 632 this=exp.var(expressions[0].name), 633 expression=self._parse_assignment(), 634 ), 635 } 636 637 COLUMN_OPERATORS = { 638 TokenType.DOT: None, 639 TokenType.DCOLON: lambda self, this, to: self.expression( 640 exp.Cast if self.STRICT_CAST else exp.TryCast, 641 this=this, 642 to=to, 643 ), 644 TokenType.ARROW: lambda self, this, path: self.expression( 645 exp.JSONExtract, 646 this=this, 647 expression=self.dialect.to_json_path(path), 648 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 649 ), 650 TokenType.DARROW: lambda self, this, path: self.expression( 651 exp.JSONExtractScalar, 652 this=this, 653 expression=self.dialect.to_json_path(path), 654 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 655 ), 656 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 657 exp.JSONBExtract, 658 this=this, 659 expression=path, 660 ), 661 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 662 exp.JSONBExtractScalar, 663 this=this, 664 expression=path, 665 ), 666 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 667 exp.JSONBContains, 668 this=this, 669 expression=key, 670 ), 671 } 672 673 EXPRESSION_PARSERS = { 674 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 675 exp.Column: lambda self: self._parse_column(), 676 exp.Condition: lambda self: self._parse_assignment(), 677 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 678 exp.Expression: lambda self: self._parse_expression(), 679 exp.From: lambda self: self._parse_from(joins=True), 680 exp.Group: lambda self: self._parse_group(), 681 exp.Having: lambda self: self._parse_having(), 682 exp.Identifier: lambda self: self._parse_id_var(), 683 exp.Join: lambda self: self._parse_join(), 684 exp.Lambda: lambda self: self._parse_lambda(), 685 exp.Lateral: lambda self: self._parse_lateral(), 686 exp.Limit: lambda self: self._parse_limit(), 687 exp.Offset: lambda self: self._parse_offset(), 688 exp.Order: lambda self: self._parse_order(), 689 exp.Ordered: lambda self: self._parse_ordered(), 690 exp.Properties: lambda self: self._parse_properties(), 691 exp.Qualify: lambda self: self._parse_qualify(), 692 exp.Returning: lambda self: self._parse_returning(), 693 exp.Select: lambda self: self._parse_select(), 694 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 695 exp.Table: lambda self: self._parse_table_parts(), 696 exp.TableAlias: lambda self: self._parse_table_alias(), 697 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 698 exp.Where: lambda self: self._parse_where(), 699 exp.Window: lambda self: self._parse_named_window(), 700 exp.With: lambda self: self._parse_with(), 701 "JOIN_TYPE": lambda self: self._parse_join_parts(), 702 } 703 704 STATEMENT_PARSERS = { 705 TokenType.ALTER: lambda self: self._parse_alter(), 706 TokenType.BEGIN: lambda self: self._parse_transaction(), 707 TokenType.CACHE: lambda self: self._parse_cache(), 708 TokenType.COMMENT: lambda self: self._parse_comment(), 709 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 710 TokenType.COPY: lambda self: self._parse_copy(), 711 TokenType.CREATE: lambda self: self._parse_create(), 712 TokenType.DELETE: lambda self: self._parse_delete(), 713 TokenType.DESC: lambda self: self._parse_describe(), 714 TokenType.DESCRIBE: lambda self: self._parse_describe(), 715 TokenType.DROP: lambda self: self._parse_drop(), 716 TokenType.INSERT: lambda self: self._parse_insert(), 717 TokenType.KILL: lambda self: self._parse_kill(), 718 TokenType.LOAD: lambda self: self._parse_load(), 719 TokenType.MERGE: lambda self: self._parse_merge(), 720 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 721 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 722 TokenType.REFRESH: lambda self: self._parse_refresh(), 723 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 724 TokenType.SET: lambda self: self._parse_set(), 725 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 726 TokenType.UNCACHE: lambda self: self._parse_uncache(), 727 TokenType.UPDATE: lambda self: self._parse_update(), 728 TokenType.USE: lambda self: self.expression( 729 exp.Use, 730 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 731 this=self._parse_table(schema=False), 732 ), 733 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 734 } 735 736 UNARY_PARSERS = { 737 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 738 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 739 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 740 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 741 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 742 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 743 } 744 745 STRING_PARSERS = { 746 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 747 exp.RawString, this=token.text 748 ), 749 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 750 exp.National, this=token.text 751 ), 752 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 753 TokenType.STRING: lambda self, token: self.expression( 754 exp.Literal, this=token.text, is_string=True 755 ), 756 TokenType.UNICODE_STRING: lambda self, token: self.expression( 757 exp.UnicodeString, 758 this=token.text, 759 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 760 ), 761 } 762 763 NUMERIC_PARSERS = { 764 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 765 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 766 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 767 TokenType.NUMBER: lambda self, token: self.expression( 768 exp.Literal, this=token.text, is_string=False 769 ), 770 } 771 772 PRIMARY_PARSERS = { 773 **STRING_PARSERS, 774 **NUMERIC_PARSERS, 775 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 776 TokenType.NULL: lambda self, _: self.expression(exp.Null), 777 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 778 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 779 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 780 TokenType.STAR: lambda self, _: self.expression( 781 exp.Star, 782 **{ 783 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 784 "replace": self._parse_star_op("REPLACE"), 785 "rename": self._parse_star_op("RENAME"), 786 }, 787 ), 788 } 789 790 PLACEHOLDER_PARSERS = { 791 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 792 TokenType.PARAMETER: lambda self: self._parse_parameter(), 793 TokenType.COLON: lambda self: ( 794 self.expression(exp.Placeholder, this=self._prev.text) 795 if self._match_set(self.ID_VAR_TOKENS) 796 else None 797 ), 798 } 799 800 RANGE_PARSERS = { 801 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 802 TokenType.GLOB: binary_range_parser(exp.Glob), 803 TokenType.ILIKE: binary_range_parser(exp.ILike), 804 TokenType.IN: lambda self, this: self._parse_in(this), 805 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 806 TokenType.IS: lambda self, this: self._parse_is(this), 807 TokenType.LIKE: binary_range_parser(exp.Like), 808 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 809 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 810 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 811 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 812 } 813 814 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 815 "ALLOWED_VALUES": lambda self: self.expression( 816 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 817 ), 818 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 819 "AUTO": lambda self: self._parse_auto_property(), 820 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 821 "BACKUP": lambda self: self.expression( 822 exp.BackupProperty, this=self._parse_var(any_token=True) 823 ), 824 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 825 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 826 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 827 "CHECKSUM": lambda self: self._parse_checksum(), 828 "CLUSTER BY": lambda self: self._parse_cluster(), 829 "CLUSTERED": lambda self: self._parse_clustered_by(), 830 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 831 exp.CollateProperty, **kwargs 832 ), 833 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 834 "CONTAINS": lambda self: self._parse_contains_property(), 835 "COPY": lambda self: self._parse_copy_property(), 836 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 837 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 838 "DEFINER": lambda self: self._parse_definer(), 839 "DETERMINISTIC": lambda self: self.expression( 840 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 841 ), 842 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 843 "DISTKEY": lambda self: self._parse_distkey(), 844 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 845 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 846 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 847 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 848 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 849 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 850 "FREESPACE": lambda self: self._parse_freespace(), 851 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 852 "HEAP": lambda self: self.expression(exp.HeapProperty), 853 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 854 "IMMUTABLE": lambda self: self.expression( 855 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 856 ), 857 "INHERITS": lambda self: self.expression( 858 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 859 ), 860 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 861 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 862 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 863 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 864 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 865 "LIKE": lambda self: self._parse_create_like(), 866 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 867 "LOCK": lambda self: self._parse_locking(), 868 "LOCKING": lambda self: self._parse_locking(), 869 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 870 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 871 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 872 "MODIFIES": lambda self: self._parse_modifies_property(), 873 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 874 "NO": lambda self: self._parse_no_property(), 875 "ON": lambda self: self._parse_on_property(), 876 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 877 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 878 "PARTITION": lambda self: self._parse_partitioned_of(), 879 "PARTITION BY": lambda self: self._parse_partitioned_by(), 880 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 881 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 882 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 883 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 884 "READS": lambda self: self._parse_reads_property(), 885 "REMOTE": lambda self: self._parse_remote_with_connection(), 886 "RETURNS": lambda self: self._parse_returns(), 887 "STRICT": lambda self: self.expression(exp.StrictProperty), 888 "ROW": lambda self: self._parse_row(), 889 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 890 "SAMPLE": lambda self: self.expression( 891 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 892 ), 893 "SECURE": lambda self: self.expression(exp.SecureProperty), 894 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 895 "SETTINGS": lambda self: self.expression( 896 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 897 ), 898 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 899 "SORTKEY": lambda self: self._parse_sortkey(), 900 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 901 "STABLE": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("STABLE") 903 ), 904 "STORED": lambda self: self._parse_stored(), 905 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 906 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 907 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 908 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 909 "TO": lambda self: self._parse_to_table(), 910 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 911 "TRANSFORM": lambda self: self.expression( 912 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 913 ), 914 "TTL": lambda self: self._parse_ttl(), 915 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 916 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 917 "VOLATILE": lambda self: self._parse_volatile_property(), 918 "WITH": lambda self: self._parse_with_property(), 919 } 920 921 CONSTRAINT_PARSERS = { 922 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 923 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 924 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 925 "CHARACTER SET": lambda self: self.expression( 926 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 927 ), 928 "CHECK": lambda self: self.expression( 929 exp.CheckColumnConstraint, 930 this=self._parse_wrapped(self._parse_assignment), 931 enforced=self._match_text_seq("ENFORCED"), 932 ), 933 "COLLATE": lambda self: self.expression( 934 exp.CollateColumnConstraint, 935 this=self._parse_identifier() or self._parse_column(), 936 ), 937 "COMMENT": lambda self: self.expression( 938 exp.CommentColumnConstraint, this=self._parse_string() 939 ), 940 "COMPRESS": lambda self: self._parse_compress(), 941 "CLUSTERED": lambda self: self.expression( 942 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 943 ), 944 "NONCLUSTERED": lambda self: self.expression( 945 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 946 ), 947 "DEFAULT": lambda self: self.expression( 948 exp.DefaultColumnConstraint, this=self._parse_bitwise() 949 ), 950 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 951 "EPHEMERAL": lambda self: self.expression( 952 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 953 ), 954 "EXCLUDE": lambda self: self.expression( 955 exp.ExcludeColumnConstraint, this=self._parse_index_params() 956 ), 957 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 958 "FORMAT": lambda self: self.expression( 959 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 960 ), 961 "GENERATED": lambda self: self._parse_generated_as_identity(), 962 "IDENTITY": lambda self: self._parse_auto_increment(), 963 "INLINE": lambda self: self._parse_inline(), 964 "LIKE": lambda self: self._parse_create_like(), 965 "NOT": lambda self: self._parse_not_constraint(), 966 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 967 "ON": lambda self: ( 968 self._match(TokenType.UPDATE) 969 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 970 ) 971 or self.expression(exp.OnProperty, this=self._parse_id_var()), 972 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 973 "PERIOD": lambda self: self._parse_period_for_system_time(), 974 "PRIMARY KEY": lambda self: self._parse_primary_key(), 975 "REFERENCES": lambda self: self._parse_references(match=False), 976 "TITLE": lambda self: self.expression( 977 exp.TitleColumnConstraint, this=self._parse_var_or_string() 978 ), 979 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 980 "UNIQUE": lambda self: self._parse_unique(), 981 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 982 "WITH": lambda self: self.expression( 983 exp.Properties, expressions=self._parse_wrapped_properties() 984 ), 985 } 986 987 ALTER_PARSERS = { 988 "ADD": lambda self: self._parse_alter_table_add(), 989 "ALTER": lambda self: self._parse_alter_table_alter(), 990 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 991 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 992 "DROP": lambda self: self._parse_alter_table_drop(), 993 "RENAME": lambda self: self._parse_alter_table_rename(), 994 "SET": lambda self: self._parse_alter_table_set(), 995 } 996 997 ALTER_ALTER_PARSERS = { 998 "DISTKEY": lambda self: self._parse_alter_diststyle(), 999 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1000 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1001 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1002 } 1003 1004 SCHEMA_UNNAMED_CONSTRAINTS = { 1005 "CHECK", 1006 "EXCLUDE", 1007 "FOREIGN KEY", 1008 "LIKE", 1009 "PERIOD", 1010 "PRIMARY KEY", 1011 "UNIQUE", 1012 } 1013 1014 NO_PAREN_FUNCTION_PARSERS = { 1015 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1016 "CASE": lambda self: self._parse_case(), 1017 "CONNECT_BY_ROOT": lambda self: self.expression( 1018 exp.ConnectByRoot, this=self._parse_column() 1019 ), 1020 "IF": lambda self: self._parse_if(), 1021 "NEXT": lambda self: self._parse_next_value_for(), 1022 } 1023 1024 INVALID_FUNC_NAME_TOKENS = { 1025 TokenType.IDENTIFIER, 1026 TokenType.STRING, 1027 } 1028 1029 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1030 1031 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1032 1033 FUNCTION_PARSERS = { 1034 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1035 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1036 "DECODE": lambda self: self._parse_decode(), 1037 "EXTRACT": lambda self: self._parse_extract(), 1038 "GAP_FILL": lambda self: self._parse_gap_fill(), 1039 "JSON_OBJECT": lambda self: self._parse_json_object(), 1040 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1041 "JSON_TABLE": lambda self: self._parse_json_table(), 1042 "MATCH": lambda self: self._parse_match_against(), 1043 "OPENJSON": lambda self: self._parse_open_json(), 1044 "POSITION": lambda self: self._parse_position(), 1045 "PREDICT": lambda self: self._parse_predict(), 1046 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1047 "STRING_AGG": lambda self: self._parse_string_agg(), 1048 "SUBSTRING": lambda self: self._parse_substring(), 1049 "TRIM": lambda self: self._parse_trim(), 1050 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1051 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1052 } 1053 1054 QUERY_MODIFIER_PARSERS = { 1055 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1056 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1057 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1058 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1059 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1060 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1061 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1062 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1063 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1064 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1065 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1066 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1067 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1068 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1069 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1070 TokenType.CLUSTER_BY: lambda self: ( 1071 "cluster", 1072 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1073 ), 1074 TokenType.DISTRIBUTE_BY: lambda self: ( 1075 "distribute", 1076 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1077 ), 1078 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1079 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1080 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1081 } 1082 1083 SET_PARSERS = { 1084 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1085 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1086 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1087 "TRANSACTION": lambda self: self._parse_set_transaction(), 1088 } 1089 1090 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1091 1092 TYPE_LITERAL_PARSERS = { 1093 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1094 } 1095 1096 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1097 1098 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1099 1100 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1101 1102 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1103 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1104 "ISOLATION": ( 1105 ("LEVEL", "REPEATABLE", "READ"), 1106 ("LEVEL", "READ", "COMMITTED"), 1107 ("LEVEL", "READ", "UNCOMITTED"), 1108 ("LEVEL", "SERIALIZABLE"), 1109 ), 1110 "READ": ("WRITE", "ONLY"), 1111 } 1112 1113 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1114 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1115 ) 1116 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1117 1118 CREATE_SEQUENCE: OPTIONS_TYPE = { 1119 "SCALE": ("EXTEND", "NOEXTEND"), 1120 "SHARD": ("EXTEND", "NOEXTEND"), 1121 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1122 **dict.fromkeys( 1123 ( 1124 "SESSION", 1125 "GLOBAL", 1126 "KEEP", 1127 "NOKEEP", 1128 "ORDER", 1129 "NOORDER", 1130 "NOCACHE", 1131 "CYCLE", 1132 "NOCYCLE", 1133 "NOMINVALUE", 1134 "NOMAXVALUE", 1135 "NOSCALE", 1136 "NOSHARD", 1137 ), 1138 tuple(), 1139 ), 1140 } 1141 1142 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1143 1144 USABLES: OPTIONS_TYPE = dict.fromkeys( 1145 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1146 ) 1147 1148 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1149 1150 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1151 "TYPE": ("EVOLUTION",), 1152 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1153 } 1154 1155 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1156 "NOT": ("ENFORCED",), 1157 "MATCH": ( 1158 "FULL", 1159 "PARTIAL", 1160 "SIMPLE", 1161 ), 1162 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1163 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1164 } 1165 1166 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1167 1168 CLONE_KEYWORDS = {"CLONE", "COPY"} 1169 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1170 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1171 1172 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1173 1174 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1175 1176 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1177 1178 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1179 1180 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1181 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1182 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1183 1184 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1185 1186 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1187 1188 ADD_CONSTRAINT_TOKENS = { 1189 TokenType.CONSTRAINT, 1190 TokenType.FOREIGN_KEY, 1191 TokenType.INDEX, 1192 TokenType.KEY, 1193 TokenType.PRIMARY_KEY, 1194 TokenType.UNIQUE, 1195 } 1196 1197 DISTINCT_TOKENS = {TokenType.DISTINCT} 1198 1199 NULL_TOKENS = {TokenType.NULL} 1200 1201 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1202 1203 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1204 1205 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1206 1207 STRICT_CAST = True 1208 1209 PREFIXED_PIVOT_COLUMNS = False 1210 IDENTIFY_PIVOT_STRINGS = False 1211 1212 LOG_DEFAULTS_TO_LN = False 1213 1214 # Whether ADD is present for each column added by ALTER TABLE 1215 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1216 1217 # Whether the table sample clause expects CSV syntax 1218 TABLESAMPLE_CSV = False 1219 1220 # The default method used for table sampling 1221 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1222 1223 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1224 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1225 1226 # Whether the TRIM function expects the characters to trim as its first argument 1227 TRIM_PATTERN_FIRST = False 1228 1229 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1230 STRING_ALIASES = False 1231 1232 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1233 MODIFIERS_ATTACHED_TO_SET_OP = True 1234 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1235 1236 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1237 NO_PAREN_IF_COMMANDS = True 1238 1239 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1240 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1241 1242 # Whether the `:` operator is used to extract a value from a VARIANT column 1243 COLON_IS_VARIANT_EXTRACT = False 1244 1245 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1246 # If this is True and '(' is not found, the keyword will be treated as an identifier 1247 VALUES_FOLLOWED_BY_PAREN = True 1248 1249 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1250 SUPPORTS_IMPLICIT_UNNEST = False 1251 1252 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1253 INTERVAL_SPANS = True 1254 1255 # Whether a PARTITION clause can follow a table reference 1256 SUPPORTS_PARTITION_SELECTION = False 1257 1258 __slots__ = ( 1259 "error_level", 1260 "error_message_context", 1261 "max_errors", 1262 "dialect", 1263 "sql", 1264 "errors", 1265 "_tokens", 1266 "_index", 1267 "_curr", 1268 "_next", 1269 "_prev", 1270 "_prev_comments", 1271 ) 1272 1273 # Autofilled 1274 SHOW_TRIE: t.Dict = {} 1275 SET_TRIE: t.Dict = {} 1276 1277 def __init__( 1278 self, 1279 error_level: t.Optional[ErrorLevel] = None, 1280 error_message_context: int = 100, 1281 max_errors: int = 3, 1282 dialect: DialectType = None, 1283 ): 1284 from sqlglot.dialects import Dialect 1285 1286 self.error_level = error_level or ErrorLevel.IMMEDIATE 1287 self.error_message_context = error_message_context 1288 self.max_errors = max_errors 1289 self.dialect = Dialect.get_or_raise(dialect) 1290 self.reset() 1291 1292 def reset(self): 1293 self.sql = "" 1294 self.errors = [] 1295 self._tokens = [] 1296 self._index = 0 1297 self._curr = None 1298 self._next = None 1299 self._prev = None 1300 self._prev_comments = None 1301 1302 def parse( 1303 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1304 ) -> t.List[t.Optional[exp.Expression]]: 1305 """ 1306 Parses a list of tokens and returns a list of syntax trees, one tree 1307 per parsed SQL statement. 1308 1309 Args: 1310 raw_tokens: The list of tokens. 1311 sql: The original SQL string, used to produce helpful debug messages. 1312 1313 Returns: 1314 The list of the produced syntax trees. 1315 """ 1316 return self._parse( 1317 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1318 ) 1319 1320 def parse_into( 1321 self, 1322 expression_types: exp.IntoType, 1323 raw_tokens: t.List[Token], 1324 sql: t.Optional[str] = None, 1325 ) -> t.List[t.Optional[exp.Expression]]: 1326 """ 1327 Parses a list of tokens into a given Expression type. If a collection of Expression 1328 types is given instead, this method will try to parse the token list into each one 1329 of them, stopping at the first for which the parsing succeeds. 1330 1331 Args: 1332 expression_types: The expression type(s) to try and parse the token list into. 1333 raw_tokens: The list of tokens. 1334 sql: The original SQL string, used to produce helpful debug messages. 1335 1336 Returns: 1337 The target Expression. 1338 """ 1339 errors = [] 1340 for expression_type in ensure_list(expression_types): 1341 parser = self.EXPRESSION_PARSERS.get(expression_type) 1342 if not parser: 1343 raise TypeError(f"No parser registered for {expression_type}") 1344 1345 try: 1346 return self._parse(parser, raw_tokens, sql) 1347 except ParseError as e: 1348 e.errors[0]["into_expression"] = expression_type 1349 errors.append(e) 1350 1351 raise ParseError( 1352 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1353 errors=merge_errors(errors), 1354 ) from errors[-1] 1355 1356 def _parse( 1357 self, 1358 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1359 raw_tokens: t.List[Token], 1360 sql: t.Optional[str] = None, 1361 ) -> t.List[t.Optional[exp.Expression]]: 1362 self.reset() 1363 self.sql = sql or "" 1364 1365 total = len(raw_tokens) 1366 chunks: t.List[t.List[Token]] = [[]] 1367 1368 for i, token in enumerate(raw_tokens): 1369 if token.token_type == TokenType.SEMICOLON: 1370 if token.comments: 1371 chunks.append([token]) 1372 1373 if i < total - 1: 1374 chunks.append([]) 1375 else: 1376 chunks[-1].append(token) 1377 1378 expressions = [] 1379 1380 for tokens in chunks: 1381 self._index = -1 1382 self._tokens = tokens 1383 self._advance() 1384 1385 expressions.append(parse_method(self)) 1386 1387 if self._index < len(self._tokens): 1388 self.raise_error("Invalid expression / Unexpected token") 1389 1390 self.check_errors() 1391 1392 return expressions 1393 1394 def check_errors(self) -> None: 1395 """Logs or raises any found errors, depending on the chosen error level setting.""" 1396 if self.error_level == ErrorLevel.WARN: 1397 for error in self.errors: 1398 logger.error(str(error)) 1399 elif self.error_level == ErrorLevel.RAISE and self.errors: 1400 raise ParseError( 1401 concat_messages(self.errors, self.max_errors), 1402 errors=merge_errors(self.errors), 1403 ) 1404 1405 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1406 """ 1407 Appends an error in the list of recorded errors or raises it, depending on the chosen 1408 error level setting. 1409 """ 1410 token = token or self._curr or self._prev or Token.string("") 1411 start = token.start 1412 end = token.end + 1 1413 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1414 highlight = self.sql[start:end] 1415 end_context = self.sql[end : end + self.error_message_context] 1416 1417 error = ParseError.new( 1418 f"{message}. Line {token.line}, Col: {token.col}.\n" 1419 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1420 description=message, 1421 line=token.line, 1422 col=token.col, 1423 start_context=start_context, 1424 highlight=highlight, 1425 end_context=end_context, 1426 ) 1427 1428 if self.error_level == ErrorLevel.IMMEDIATE: 1429 raise error 1430 1431 self.errors.append(error) 1432 1433 def expression( 1434 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1435 ) -> E: 1436 """ 1437 Creates a new, validated Expression. 1438 1439 Args: 1440 exp_class: The expression class to instantiate. 1441 comments: An optional list of comments to attach to the expression. 1442 kwargs: The arguments to set for the expression along with their respective values. 1443 1444 Returns: 1445 The target expression. 1446 """ 1447 instance = exp_class(**kwargs) 1448 instance.add_comments(comments) if comments else self._add_comments(instance) 1449 return self.validate_expression(instance) 1450 1451 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1452 if expression and self._prev_comments: 1453 expression.add_comments(self._prev_comments) 1454 self._prev_comments = None 1455 1456 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1457 """ 1458 Validates an Expression, making sure that all its mandatory arguments are set. 1459 1460 Args: 1461 expression: The expression to validate. 1462 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1463 1464 Returns: 1465 The validated expression. 1466 """ 1467 if self.error_level != ErrorLevel.IGNORE: 1468 for error_message in expression.error_messages(args): 1469 self.raise_error(error_message) 1470 1471 return expression 1472 1473 def _find_sql(self, start: Token, end: Token) -> str: 1474 return self.sql[start.start : end.end + 1] 1475 1476 def _is_connected(self) -> bool: 1477 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1478 1479 def _advance(self, times: int = 1) -> None: 1480 self._index += times 1481 self._curr = seq_get(self._tokens, self._index) 1482 self._next = seq_get(self._tokens, self._index + 1) 1483 1484 if self._index > 0: 1485 self._prev = self._tokens[self._index - 1] 1486 self._prev_comments = self._prev.comments 1487 else: 1488 self._prev = None 1489 self._prev_comments = None 1490 1491 def _retreat(self, index: int) -> None: 1492 if index != self._index: 1493 self._advance(index - self._index) 1494 1495 def _warn_unsupported(self) -> None: 1496 if len(self._tokens) <= 1: 1497 return 1498 1499 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1500 # interested in emitting a warning for the one being currently processed. 1501 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1502 1503 logger.warning( 1504 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1505 ) 1506 1507 def _parse_command(self) -> exp.Command: 1508 self._warn_unsupported() 1509 return self.expression( 1510 exp.Command, 1511 comments=self._prev_comments, 1512 this=self._prev.text.upper(), 1513 expression=self._parse_string(), 1514 ) 1515 1516 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1517 """ 1518 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1519 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1520 solve this by setting & resetting the parser state accordingly 1521 """ 1522 index = self._index 1523 error_level = self.error_level 1524 1525 self.error_level = ErrorLevel.IMMEDIATE 1526 try: 1527 this = parse_method() 1528 except ParseError: 1529 this = None 1530 finally: 1531 if not this or retreat: 1532 self._retreat(index) 1533 self.error_level = error_level 1534 1535 return this 1536 1537 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1538 start = self._prev 1539 exists = self._parse_exists() if allow_exists else None 1540 1541 self._match(TokenType.ON) 1542 1543 materialized = self._match_text_seq("MATERIALIZED") 1544 kind = self._match_set(self.CREATABLES) and self._prev 1545 if not kind: 1546 return self._parse_as_command(start) 1547 1548 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1549 this = self._parse_user_defined_function(kind=kind.token_type) 1550 elif kind.token_type == TokenType.TABLE: 1551 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1552 elif kind.token_type == TokenType.COLUMN: 1553 this = self._parse_column() 1554 else: 1555 this = self._parse_id_var() 1556 1557 self._match(TokenType.IS) 1558 1559 return self.expression( 1560 exp.Comment, 1561 this=this, 1562 kind=kind.text, 1563 expression=self._parse_string(), 1564 exists=exists, 1565 materialized=materialized, 1566 ) 1567 1568 def _parse_to_table( 1569 self, 1570 ) -> exp.ToTableProperty: 1571 table = self._parse_table_parts(schema=True) 1572 return self.expression(exp.ToTableProperty, this=table) 1573 1574 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1575 def _parse_ttl(self) -> exp.Expression: 1576 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1577 this = self._parse_bitwise() 1578 1579 if self._match_text_seq("DELETE"): 1580 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1581 if self._match_text_seq("RECOMPRESS"): 1582 return self.expression( 1583 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1584 ) 1585 if self._match_text_seq("TO", "DISK"): 1586 return self.expression( 1587 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1588 ) 1589 if self._match_text_seq("TO", "VOLUME"): 1590 return self.expression( 1591 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1592 ) 1593 1594 return this 1595 1596 expressions = self._parse_csv(_parse_ttl_action) 1597 where = self._parse_where() 1598 group = self._parse_group() 1599 1600 aggregates = None 1601 if group and self._match(TokenType.SET): 1602 aggregates = self._parse_csv(self._parse_set_item) 1603 1604 return self.expression( 1605 exp.MergeTreeTTL, 1606 expressions=expressions, 1607 where=where, 1608 group=group, 1609 aggregates=aggregates, 1610 ) 1611 1612 def _parse_statement(self) -> t.Optional[exp.Expression]: 1613 if self._curr is None: 1614 return None 1615 1616 if self._match_set(self.STATEMENT_PARSERS): 1617 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1618 1619 if self._match_set(self.dialect.tokenizer.COMMANDS): 1620 return self._parse_command() 1621 1622 expression = self._parse_expression() 1623 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1624 return self._parse_query_modifiers(expression) 1625 1626 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1627 start = self._prev 1628 temporary = self._match(TokenType.TEMPORARY) 1629 materialized = self._match_text_seq("MATERIALIZED") 1630 1631 kind = self._match_set(self.CREATABLES) and self._prev.text 1632 if not kind: 1633 return self._parse_as_command(start) 1634 1635 if_exists = exists or self._parse_exists() 1636 table = self._parse_table_parts( 1637 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1638 ) 1639 1640 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1641 1642 if self._match(TokenType.L_PAREN, advance=False): 1643 expressions = self._parse_wrapped_csv(self._parse_types) 1644 else: 1645 expressions = None 1646 1647 return self.expression( 1648 exp.Drop, 1649 comments=start.comments, 1650 exists=if_exists, 1651 this=table, 1652 expressions=expressions, 1653 kind=kind.upper(), 1654 temporary=temporary, 1655 materialized=materialized, 1656 cascade=self._match_text_seq("CASCADE"), 1657 constraints=self._match_text_seq("CONSTRAINTS"), 1658 purge=self._match_text_seq("PURGE"), 1659 cluster=cluster, 1660 ) 1661 1662 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1663 return ( 1664 self._match_text_seq("IF") 1665 and (not not_ or self._match(TokenType.NOT)) 1666 and self._match(TokenType.EXISTS) 1667 ) 1668 1669 def _parse_create(self) -> exp.Create | exp.Command: 1670 # Note: this can't be None because we've matched a statement parser 1671 start = self._prev 1672 comments = self._prev_comments 1673 1674 replace = ( 1675 start.token_type == TokenType.REPLACE 1676 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1677 or self._match_pair(TokenType.OR, TokenType.ALTER) 1678 ) 1679 1680 unique = self._match(TokenType.UNIQUE) 1681 1682 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1683 clustered = True 1684 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1685 "COLUMNSTORE" 1686 ): 1687 clustered = False 1688 else: 1689 clustered = None 1690 1691 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1692 self._advance() 1693 1694 properties = None 1695 create_token = self._match_set(self.CREATABLES) and self._prev 1696 1697 if not create_token: 1698 # exp.Properties.Location.POST_CREATE 1699 properties = self._parse_properties() 1700 create_token = self._match_set(self.CREATABLES) and self._prev 1701 1702 if not properties or not create_token: 1703 return self._parse_as_command(start) 1704 1705 concurrently = self._match_text_seq("CONCURRENTLY") 1706 exists = self._parse_exists(not_=True) 1707 this = None 1708 expression: t.Optional[exp.Expression] = None 1709 indexes = None 1710 no_schema_binding = None 1711 begin = None 1712 end = None 1713 clone = None 1714 1715 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1716 nonlocal properties 1717 if properties and temp_props: 1718 properties.expressions.extend(temp_props.expressions) 1719 elif temp_props: 1720 properties = temp_props 1721 1722 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1723 this = self._parse_user_defined_function(kind=create_token.token_type) 1724 1725 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1726 extend_props(self._parse_properties()) 1727 1728 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1729 extend_props(self._parse_properties()) 1730 1731 if not expression: 1732 if self._match(TokenType.COMMAND): 1733 expression = self._parse_as_command(self._prev) 1734 else: 1735 begin = self._match(TokenType.BEGIN) 1736 return_ = self._match_text_seq("RETURN") 1737 1738 if self._match(TokenType.STRING, advance=False): 1739 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1740 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1741 expression = self._parse_string() 1742 extend_props(self._parse_properties()) 1743 else: 1744 expression = self._parse_statement() 1745 1746 end = self._match_text_seq("END") 1747 1748 if return_: 1749 expression = self.expression(exp.Return, this=expression) 1750 elif create_token.token_type == TokenType.INDEX: 1751 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1752 if not self._match(TokenType.ON): 1753 index = self._parse_id_var() 1754 anonymous = False 1755 else: 1756 index = None 1757 anonymous = True 1758 1759 this = self._parse_index(index=index, anonymous=anonymous) 1760 elif create_token.token_type in self.DB_CREATABLES: 1761 table_parts = self._parse_table_parts( 1762 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1763 ) 1764 1765 # exp.Properties.Location.POST_NAME 1766 self._match(TokenType.COMMA) 1767 extend_props(self._parse_properties(before=True)) 1768 1769 this = self._parse_schema(this=table_parts) 1770 1771 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1772 extend_props(self._parse_properties()) 1773 1774 self._match(TokenType.ALIAS) 1775 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1776 # exp.Properties.Location.POST_ALIAS 1777 extend_props(self._parse_properties()) 1778 1779 if create_token.token_type == TokenType.SEQUENCE: 1780 expression = self._parse_types() 1781 extend_props(self._parse_properties()) 1782 else: 1783 expression = self._parse_ddl_select() 1784 1785 if create_token.token_type == TokenType.TABLE: 1786 # exp.Properties.Location.POST_EXPRESSION 1787 extend_props(self._parse_properties()) 1788 1789 indexes = [] 1790 while True: 1791 index = self._parse_index() 1792 1793 # exp.Properties.Location.POST_INDEX 1794 extend_props(self._parse_properties()) 1795 1796 if not index: 1797 break 1798 else: 1799 self._match(TokenType.COMMA) 1800 indexes.append(index) 1801 elif create_token.token_type == TokenType.VIEW: 1802 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1803 no_schema_binding = True 1804 1805 shallow = self._match_text_seq("SHALLOW") 1806 1807 if self._match_texts(self.CLONE_KEYWORDS): 1808 copy = self._prev.text.lower() == "copy" 1809 clone = self.expression( 1810 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1811 ) 1812 1813 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1814 return self._parse_as_command(start) 1815 1816 return self.expression( 1817 exp.Create, 1818 comments=comments, 1819 this=this, 1820 kind=create_token.text.upper(), 1821 replace=replace, 1822 unique=unique, 1823 expression=expression, 1824 exists=exists, 1825 properties=properties, 1826 indexes=indexes, 1827 no_schema_binding=no_schema_binding, 1828 begin=begin, 1829 end=end, 1830 clone=clone, 1831 concurrently=concurrently, 1832 clustered=clustered, 1833 ) 1834 1835 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1836 seq = exp.SequenceProperties() 1837 1838 options = [] 1839 index = self._index 1840 1841 while self._curr: 1842 self._match(TokenType.COMMA) 1843 if self._match_text_seq("INCREMENT"): 1844 self._match_text_seq("BY") 1845 self._match_text_seq("=") 1846 seq.set("increment", self._parse_term()) 1847 elif self._match_text_seq("MINVALUE"): 1848 seq.set("minvalue", self._parse_term()) 1849 elif self._match_text_seq("MAXVALUE"): 1850 seq.set("maxvalue", self._parse_term()) 1851 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1852 self._match_text_seq("=") 1853 seq.set("start", self._parse_term()) 1854 elif self._match_text_seq("CACHE"): 1855 # T-SQL allows empty CACHE which is initialized dynamically 1856 seq.set("cache", self._parse_number() or True) 1857 elif self._match_text_seq("OWNED", "BY"): 1858 # "OWNED BY NONE" is the default 1859 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1860 else: 1861 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1862 if opt: 1863 options.append(opt) 1864 else: 1865 break 1866 1867 seq.set("options", options if options else None) 1868 return None if self._index == index else seq 1869 1870 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1871 # only used for teradata currently 1872 self._match(TokenType.COMMA) 1873 1874 kwargs = { 1875 "no": self._match_text_seq("NO"), 1876 "dual": self._match_text_seq("DUAL"), 1877 "before": self._match_text_seq("BEFORE"), 1878 "default": self._match_text_seq("DEFAULT"), 1879 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1880 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1881 "after": self._match_text_seq("AFTER"), 1882 "minimum": self._match_texts(("MIN", "MINIMUM")), 1883 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1884 } 1885 1886 if self._match_texts(self.PROPERTY_PARSERS): 1887 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1888 try: 1889 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1890 except TypeError: 1891 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1892 1893 return None 1894 1895 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1896 return self._parse_wrapped_csv(self._parse_property) 1897 1898 def _parse_property(self) -> t.Optional[exp.Expression]: 1899 if self._match_texts(self.PROPERTY_PARSERS): 1900 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1901 1902 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1903 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1904 1905 if self._match_text_seq("COMPOUND", "SORTKEY"): 1906 return self._parse_sortkey(compound=True) 1907 1908 if self._match_text_seq("SQL", "SECURITY"): 1909 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1910 1911 index = self._index 1912 key = self._parse_column() 1913 1914 if not self._match(TokenType.EQ): 1915 self._retreat(index) 1916 return self._parse_sequence_properties() 1917 1918 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1919 if isinstance(key, exp.Column): 1920 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1921 1922 value = self._parse_bitwise() or self._parse_var(any_token=True) 1923 1924 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1925 if isinstance(value, exp.Column): 1926 value = exp.var(value.name) 1927 1928 return self.expression(exp.Property, this=key, value=value) 1929 1930 def _parse_stored(self) -> exp.FileFormatProperty: 1931 self._match(TokenType.ALIAS) 1932 1933 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1934 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1935 1936 return self.expression( 1937 exp.FileFormatProperty, 1938 this=( 1939 self.expression( 1940 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1941 ) 1942 if input_format or output_format 1943 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1944 ), 1945 ) 1946 1947 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1948 field = self._parse_field() 1949 if isinstance(field, exp.Identifier) and not field.quoted: 1950 field = exp.var(field) 1951 1952 return field 1953 1954 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1955 self._match(TokenType.EQ) 1956 self._match(TokenType.ALIAS) 1957 1958 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1959 1960 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1961 properties = [] 1962 while True: 1963 if before: 1964 prop = self._parse_property_before() 1965 else: 1966 prop = self._parse_property() 1967 if not prop: 1968 break 1969 for p in ensure_list(prop): 1970 properties.append(p) 1971 1972 if properties: 1973 return self.expression(exp.Properties, expressions=properties) 1974 1975 return None 1976 1977 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1978 return self.expression( 1979 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1980 ) 1981 1982 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1983 if self._index >= 2: 1984 pre_volatile_token = self._tokens[self._index - 2] 1985 else: 1986 pre_volatile_token = None 1987 1988 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1989 return exp.VolatileProperty() 1990 1991 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1992 1993 def _parse_retention_period(self) -> exp.Var: 1994 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1995 number = self._parse_number() 1996 number_str = f"{number} " if number else "" 1997 unit = self._parse_var(any_token=True) 1998 return exp.var(f"{number_str}{unit}") 1999 2000 def _parse_system_versioning_property( 2001 self, with_: bool = False 2002 ) -> exp.WithSystemVersioningProperty: 2003 self._match(TokenType.EQ) 2004 prop = self.expression( 2005 exp.WithSystemVersioningProperty, 2006 **{ # type: ignore 2007 "on": True, 2008 "with": with_, 2009 }, 2010 ) 2011 2012 if self._match_text_seq("OFF"): 2013 prop.set("on", False) 2014 return prop 2015 2016 self._match(TokenType.ON) 2017 if self._match(TokenType.L_PAREN): 2018 while self._curr and not self._match(TokenType.R_PAREN): 2019 if self._match_text_seq("HISTORY_TABLE", "="): 2020 prop.set("this", self._parse_table_parts()) 2021 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2022 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2023 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2024 prop.set("retention_period", self._parse_retention_period()) 2025 2026 self._match(TokenType.COMMA) 2027 2028 return prop 2029 2030 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2031 self._match(TokenType.EQ) 2032 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2033 prop = self.expression(exp.DataDeletionProperty, on=on) 2034 2035 if self._match(TokenType.L_PAREN): 2036 while self._curr and not self._match(TokenType.R_PAREN): 2037 if self._match_text_seq("FILTER_COLUMN", "="): 2038 prop.set("filter_column", self._parse_column()) 2039 elif self._match_text_seq("RETENTION_PERIOD", "="): 2040 prop.set("retention_period", self._parse_retention_period()) 2041 2042 self._match(TokenType.COMMA) 2043 2044 return prop 2045 2046 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2047 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2048 prop = self._parse_system_versioning_property(with_=True) 2049 self._match_r_paren() 2050 return prop 2051 2052 if self._match(TokenType.L_PAREN, advance=False): 2053 return self._parse_wrapped_properties() 2054 2055 if self._match_text_seq("JOURNAL"): 2056 return self._parse_withjournaltable() 2057 2058 if self._match_texts(self.VIEW_ATTRIBUTES): 2059 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2060 2061 if self._match_text_seq("DATA"): 2062 return self._parse_withdata(no=False) 2063 elif self._match_text_seq("NO", "DATA"): 2064 return self._parse_withdata(no=True) 2065 2066 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2067 return self._parse_serde_properties(with_=True) 2068 2069 if self._match(TokenType.SCHEMA): 2070 return self.expression( 2071 exp.WithSchemaBindingProperty, 2072 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2073 ) 2074 2075 if not self._next: 2076 return None 2077 2078 return self._parse_withisolatedloading() 2079 2080 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2081 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2082 self._match(TokenType.EQ) 2083 2084 user = self._parse_id_var() 2085 self._match(TokenType.PARAMETER) 2086 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2087 2088 if not user or not host: 2089 return None 2090 2091 return exp.DefinerProperty(this=f"{user}@{host}") 2092 2093 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2094 self._match(TokenType.TABLE) 2095 self._match(TokenType.EQ) 2096 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2097 2098 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2099 return self.expression(exp.LogProperty, no=no) 2100 2101 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2102 return self.expression(exp.JournalProperty, **kwargs) 2103 2104 def _parse_checksum(self) -> exp.ChecksumProperty: 2105 self._match(TokenType.EQ) 2106 2107 on = None 2108 if self._match(TokenType.ON): 2109 on = True 2110 elif self._match_text_seq("OFF"): 2111 on = False 2112 2113 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2114 2115 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2116 return self.expression( 2117 exp.Cluster, 2118 expressions=( 2119 self._parse_wrapped_csv(self._parse_ordered) 2120 if wrapped 2121 else self._parse_csv(self._parse_ordered) 2122 ), 2123 ) 2124 2125 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2126 self._match_text_seq("BY") 2127 2128 self._match_l_paren() 2129 expressions = self._parse_csv(self._parse_column) 2130 self._match_r_paren() 2131 2132 if self._match_text_seq("SORTED", "BY"): 2133 self._match_l_paren() 2134 sorted_by = self._parse_csv(self._parse_ordered) 2135 self._match_r_paren() 2136 else: 2137 sorted_by = None 2138 2139 self._match(TokenType.INTO) 2140 buckets = self._parse_number() 2141 self._match_text_seq("BUCKETS") 2142 2143 return self.expression( 2144 exp.ClusteredByProperty, 2145 expressions=expressions, 2146 sorted_by=sorted_by, 2147 buckets=buckets, 2148 ) 2149 2150 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2151 if not self._match_text_seq("GRANTS"): 2152 self._retreat(self._index - 1) 2153 return None 2154 2155 return self.expression(exp.CopyGrantsProperty) 2156 2157 def _parse_freespace(self) -> exp.FreespaceProperty: 2158 self._match(TokenType.EQ) 2159 return self.expression( 2160 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2161 ) 2162 2163 def _parse_mergeblockratio( 2164 self, no: bool = False, default: bool = False 2165 ) -> exp.MergeBlockRatioProperty: 2166 if self._match(TokenType.EQ): 2167 return self.expression( 2168 exp.MergeBlockRatioProperty, 2169 this=self._parse_number(), 2170 percent=self._match(TokenType.PERCENT), 2171 ) 2172 2173 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2174 2175 def _parse_datablocksize( 2176 self, 2177 default: t.Optional[bool] = None, 2178 minimum: t.Optional[bool] = None, 2179 maximum: t.Optional[bool] = None, 2180 ) -> exp.DataBlocksizeProperty: 2181 self._match(TokenType.EQ) 2182 size = self._parse_number() 2183 2184 units = None 2185 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2186 units = self._prev.text 2187 2188 return self.expression( 2189 exp.DataBlocksizeProperty, 2190 size=size, 2191 units=units, 2192 default=default, 2193 minimum=minimum, 2194 maximum=maximum, 2195 ) 2196 2197 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2198 self._match(TokenType.EQ) 2199 always = self._match_text_seq("ALWAYS") 2200 manual = self._match_text_seq("MANUAL") 2201 never = self._match_text_seq("NEVER") 2202 default = self._match_text_seq("DEFAULT") 2203 2204 autotemp = None 2205 if self._match_text_seq("AUTOTEMP"): 2206 autotemp = self._parse_schema() 2207 2208 return self.expression( 2209 exp.BlockCompressionProperty, 2210 always=always, 2211 manual=manual, 2212 never=never, 2213 default=default, 2214 autotemp=autotemp, 2215 ) 2216 2217 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2218 index = self._index 2219 no = self._match_text_seq("NO") 2220 concurrent = self._match_text_seq("CONCURRENT") 2221 2222 if not self._match_text_seq("ISOLATED", "LOADING"): 2223 self._retreat(index) 2224 return None 2225 2226 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2227 return self.expression( 2228 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2229 ) 2230 2231 def _parse_locking(self) -> exp.LockingProperty: 2232 if self._match(TokenType.TABLE): 2233 kind = "TABLE" 2234 elif self._match(TokenType.VIEW): 2235 kind = "VIEW" 2236 elif self._match(TokenType.ROW): 2237 kind = "ROW" 2238 elif self._match_text_seq("DATABASE"): 2239 kind = "DATABASE" 2240 else: 2241 kind = None 2242 2243 if kind in ("DATABASE", "TABLE", "VIEW"): 2244 this = self._parse_table_parts() 2245 else: 2246 this = None 2247 2248 if self._match(TokenType.FOR): 2249 for_or_in = "FOR" 2250 elif self._match(TokenType.IN): 2251 for_or_in = "IN" 2252 else: 2253 for_or_in = None 2254 2255 if self._match_text_seq("ACCESS"): 2256 lock_type = "ACCESS" 2257 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2258 lock_type = "EXCLUSIVE" 2259 elif self._match_text_seq("SHARE"): 2260 lock_type = "SHARE" 2261 elif self._match_text_seq("READ"): 2262 lock_type = "READ" 2263 elif self._match_text_seq("WRITE"): 2264 lock_type = "WRITE" 2265 elif self._match_text_seq("CHECKSUM"): 2266 lock_type = "CHECKSUM" 2267 else: 2268 lock_type = None 2269 2270 override = self._match_text_seq("OVERRIDE") 2271 2272 return self.expression( 2273 exp.LockingProperty, 2274 this=this, 2275 kind=kind, 2276 for_or_in=for_or_in, 2277 lock_type=lock_type, 2278 override=override, 2279 ) 2280 2281 def _parse_partition_by(self) -> t.List[exp.Expression]: 2282 if self._match(TokenType.PARTITION_BY): 2283 return self._parse_csv(self._parse_assignment) 2284 return [] 2285 2286 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2287 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2288 if self._match_text_seq("MINVALUE"): 2289 return exp.var("MINVALUE") 2290 if self._match_text_seq("MAXVALUE"): 2291 return exp.var("MAXVALUE") 2292 return self._parse_bitwise() 2293 2294 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2295 expression = None 2296 from_expressions = None 2297 to_expressions = None 2298 2299 if self._match(TokenType.IN): 2300 this = self._parse_wrapped_csv(self._parse_bitwise) 2301 elif self._match(TokenType.FROM): 2302 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2303 self._match_text_seq("TO") 2304 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2305 elif self._match_text_seq("WITH", "(", "MODULUS"): 2306 this = self._parse_number() 2307 self._match_text_seq(",", "REMAINDER") 2308 expression = self._parse_number() 2309 self._match_r_paren() 2310 else: 2311 self.raise_error("Failed to parse partition bound spec.") 2312 2313 return self.expression( 2314 exp.PartitionBoundSpec, 2315 this=this, 2316 expression=expression, 2317 from_expressions=from_expressions, 2318 to_expressions=to_expressions, 2319 ) 2320 2321 # https://www.postgresql.org/docs/current/sql-createtable.html 2322 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2323 if not self._match_text_seq("OF"): 2324 self._retreat(self._index - 1) 2325 return None 2326 2327 this = self._parse_table(schema=True) 2328 2329 if self._match(TokenType.DEFAULT): 2330 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2331 elif self._match_text_seq("FOR", "VALUES"): 2332 expression = self._parse_partition_bound_spec() 2333 else: 2334 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2335 2336 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2337 2338 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2339 self._match(TokenType.EQ) 2340 return self.expression( 2341 exp.PartitionedByProperty, 2342 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2343 ) 2344 2345 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2346 if self._match_text_seq("AND", "STATISTICS"): 2347 statistics = True 2348 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2349 statistics = False 2350 else: 2351 statistics = None 2352 2353 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2354 2355 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2356 if self._match_text_seq("SQL"): 2357 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2358 return None 2359 2360 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2361 if self._match_text_seq("SQL", "DATA"): 2362 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2363 return None 2364 2365 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2366 if self._match_text_seq("PRIMARY", "INDEX"): 2367 return exp.NoPrimaryIndexProperty() 2368 if self._match_text_seq("SQL"): 2369 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2370 return None 2371 2372 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2373 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2374 return exp.OnCommitProperty() 2375 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2376 return exp.OnCommitProperty(delete=True) 2377 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2378 2379 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2380 if self._match_text_seq("SQL", "DATA"): 2381 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2382 return None 2383 2384 def _parse_distkey(self) -> exp.DistKeyProperty: 2385 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2386 2387 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2388 table = self._parse_table(schema=True) 2389 2390 options = [] 2391 while self._match_texts(("INCLUDING", "EXCLUDING")): 2392 this = self._prev.text.upper() 2393 2394 id_var = self._parse_id_var() 2395 if not id_var: 2396 return None 2397 2398 options.append( 2399 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2400 ) 2401 2402 return self.expression(exp.LikeProperty, this=table, expressions=options) 2403 2404 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2405 return self.expression( 2406 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2407 ) 2408 2409 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2410 self._match(TokenType.EQ) 2411 return self.expression( 2412 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2413 ) 2414 2415 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2416 self._match_text_seq("WITH", "CONNECTION") 2417 return self.expression( 2418 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2419 ) 2420 2421 def _parse_returns(self) -> exp.ReturnsProperty: 2422 value: t.Optional[exp.Expression] 2423 null = None 2424 is_table = self._match(TokenType.TABLE) 2425 2426 if is_table: 2427 if self._match(TokenType.LT): 2428 value = self.expression( 2429 exp.Schema, 2430 this="TABLE", 2431 expressions=self._parse_csv(self._parse_struct_types), 2432 ) 2433 if not self._match(TokenType.GT): 2434 self.raise_error("Expecting >") 2435 else: 2436 value = self._parse_schema(exp.var("TABLE")) 2437 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2438 null = True 2439 value = None 2440 else: 2441 value = self._parse_types() 2442 2443 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2444 2445 def _parse_describe(self) -> exp.Describe: 2446 kind = self._match_set(self.CREATABLES) and self._prev.text 2447 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2448 if self._match(TokenType.DOT): 2449 style = None 2450 self._retreat(self._index - 2) 2451 this = self._parse_table(schema=True) 2452 properties = self._parse_properties() 2453 expressions = properties.expressions if properties else None 2454 return self.expression( 2455 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2456 ) 2457 2458 def _parse_insert(self) -> exp.Insert: 2459 comments = ensure_list(self._prev_comments) 2460 hint = self._parse_hint() 2461 overwrite = self._match(TokenType.OVERWRITE) 2462 ignore = self._match(TokenType.IGNORE) 2463 local = self._match_text_seq("LOCAL") 2464 alternative = None 2465 is_function = None 2466 2467 if self._match_text_seq("DIRECTORY"): 2468 this: t.Optional[exp.Expression] = self.expression( 2469 exp.Directory, 2470 this=self._parse_var_or_string(), 2471 local=local, 2472 row_format=self._parse_row_format(match_row=True), 2473 ) 2474 else: 2475 if self._match(TokenType.OR): 2476 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2477 2478 self._match(TokenType.INTO) 2479 comments += ensure_list(self._prev_comments) 2480 self._match(TokenType.TABLE) 2481 is_function = self._match(TokenType.FUNCTION) 2482 2483 this = ( 2484 self._parse_table(schema=True, parse_partition=True) 2485 if not is_function 2486 else self._parse_function() 2487 ) 2488 2489 returning = self._parse_returning() 2490 2491 return self.expression( 2492 exp.Insert, 2493 comments=comments, 2494 hint=hint, 2495 is_function=is_function, 2496 this=this, 2497 stored=self._match_text_seq("STORED") and self._parse_stored(), 2498 by_name=self._match_text_seq("BY", "NAME"), 2499 exists=self._parse_exists(), 2500 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2501 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2502 conflict=self._parse_on_conflict(), 2503 returning=returning or self._parse_returning(), 2504 overwrite=overwrite, 2505 alternative=alternative, 2506 ignore=ignore, 2507 ) 2508 2509 def _parse_kill(self) -> exp.Kill: 2510 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2511 2512 return self.expression( 2513 exp.Kill, 2514 this=self._parse_primary(), 2515 kind=kind, 2516 ) 2517 2518 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2519 conflict = self._match_text_seq("ON", "CONFLICT") 2520 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2521 2522 if not conflict and not duplicate: 2523 return None 2524 2525 conflict_keys = None 2526 constraint = None 2527 2528 if conflict: 2529 if self._match_text_seq("ON", "CONSTRAINT"): 2530 constraint = self._parse_id_var() 2531 elif self._match(TokenType.L_PAREN): 2532 conflict_keys = self._parse_csv(self._parse_id_var) 2533 self._match_r_paren() 2534 2535 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2536 if self._prev.token_type == TokenType.UPDATE: 2537 self._match(TokenType.SET) 2538 expressions = self._parse_csv(self._parse_equality) 2539 else: 2540 expressions = None 2541 2542 return self.expression( 2543 exp.OnConflict, 2544 duplicate=duplicate, 2545 expressions=expressions, 2546 action=action, 2547 conflict_keys=conflict_keys, 2548 constraint=constraint, 2549 ) 2550 2551 def _parse_returning(self) -> t.Optional[exp.Returning]: 2552 if not self._match(TokenType.RETURNING): 2553 return None 2554 return self.expression( 2555 exp.Returning, 2556 expressions=self._parse_csv(self._parse_expression), 2557 into=self._match(TokenType.INTO) and self._parse_table_part(), 2558 ) 2559 2560 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2561 if not self._match(TokenType.FORMAT): 2562 return None 2563 return self._parse_row_format() 2564 2565 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2566 index = self._index 2567 with_ = with_ or self._match_text_seq("WITH") 2568 2569 if not self._match(TokenType.SERDE_PROPERTIES): 2570 self._retreat(index) 2571 return None 2572 return self.expression( 2573 exp.SerdeProperties, 2574 **{ # type: ignore 2575 "expressions": self._parse_wrapped_properties(), 2576 "with": with_, 2577 }, 2578 ) 2579 2580 def _parse_row_format( 2581 self, match_row: bool = False 2582 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2583 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2584 return None 2585 2586 if self._match_text_seq("SERDE"): 2587 this = self._parse_string() 2588 2589 serde_properties = self._parse_serde_properties() 2590 2591 return self.expression( 2592 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2593 ) 2594 2595 self._match_text_seq("DELIMITED") 2596 2597 kwargs = {} 2598 2599 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2600 kwargs["fields"] = self._parse_string() 2601 if self._match_text_seq("ESCAPED", "BY"): 2602 kwargs["escaped"] = self._parse_string() 2603 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2604 kwargs["collection_items"] = self._parse_string() 2605 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2606 kwargs["map_keys"] = self._parse_string() 2607 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2608 kwargs["lines"] = self._parse_string() 2609 if self._match_text_seq("NULL", "DEFINED", "AS"): 2610 kwargs["null"] = self._parse_string() 2611 2612 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2613 2614 def _parse_load(self) -> exp.LoadData | exp.Command: 2615 if self._match_text_seq("DATA"): 2616 local = self._match_text_seq("LOCAL") 2617 self._match_text_seq("INPATH") 2618 inpath = self._parse_string() 2619 overwrite = self._match(TokenType.OVERWRITE) 2620 self._match_pair(TokenType.INTO, TokenType.TABLE) 2621 2622 return self.expression( 2623 exp.LoadData, 2624 this=self._parse_table(schema=True), 2625 local=local, 2626 overwrite=overwrite, 2627 inpath=inpath, 2628 partition=self._parse_partition(), 2629 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2630 serde=self._match_text_seq("SERDE") and self._parse_string(), 2631 ) 2632 return self._parse_as_command(self._prev) 2633 2634 def _parse_delete(self) -> exp.Delete: 2635 # This handles MySQL's "Multiple-Table Syntax" 2636 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2637 tables = None 2638 comments = self._prev_comments 2639 if not self._match(TokenType.FROM, advance=False): 2640 tables = self._parse_csv(self._parse_table) or None 2641 2642 returning = self._parse_returning() 2643 2644 return self.expression( 2645 exp.Delete, 2646 comments=comments, 2647 tables=tables, 2648 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2649 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2650 where=self._parse_where(), 2651 returning=returning or self._parse_returning(), 2652 limit=self._parse_limit(), 2653 ) 2654 2655 def _parse_update(self) -> exp.Update: 2656 comments = self._prev_comments 2657 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2658 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2659 returning = self._parse_returning() 2660 return self.expression( 2661 exp.Update, 2662 comments=comments, 2663 **{ # type: ignore 2664 "this": this, 2665 "expressions": expressions, 2666 "from": self._parse_from(joins=True), 2667 "where": self._parse_where(), 2668 "returning": returning or self._parse_returning(), 2669 "order": self._parse_order(), 2670 "limit": self._parse_limit(), 2671 }, 2672 ) 2673 2674 def _parse_uncache(self) -> exp.Uncache: 2675 if not self._match(TokenType.TABLE): 2676 self.raise_error("Expecting TABLE after UNCACHE") 2677 2678 return self.expression( 2679 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2680 ) 2681 2682 def _parse_cache(self) -> exp.Cache: 2683 lazy = self._match_text_seq("LAZY") 2684 self._match(TokenType.TABLE) 2685 table = self._parse_table(schema=True) 2686 2687 options = [] 2688 if self._match_text_seq("OPTIONS"): 2689 self._match_l_paren() 2690 k = self._parse_string() 2691 self._match(TokenType.EQ) 2692 v = self._parse_string() 2693 options = [k, v] 2694 self._match_r_paren() 2695 2696 self._match(TokenType.ALIAS) 2697 return self.expression( 2698 exp.Cache, 2699 this=table, 2700 lazy=lazy, 2701 options=options, 2702 expression=self._parse_select(nested=True), 2703 ) 2704 2705 def _parse_partition(self) -> t.Optional[exp.Partition]: 2706 if not self._match(TokenType.PARTITION): 2707 return None 2708 2709 return self.expression( 2710 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2711 ) 2712 2713 def _parse_value(self) -> t.Optional[exp.Tuple]: 2714 if self._match(TokenType.L_PAREN): 2715 expressions = self._parse_csv(self._parse_expression) 2716 self._match_r_paren() 2717 return self.expression(exp.Tuple, expressions=expressions) 2718 2719 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2720 expression = self._parse_expression() 2721 if expression: 2722 return self.expression(exp.Tuple, expressions=[expression]) 2723 return None 2724 2725 def _parse_projections(self) -> t.List[exp.Expression]: 2726 return self._parse_expressions() 2727 2728 def _parse_select( 2729 self, 2730 nested: bool = False, 2731 table: bool = False, 2732 parse_subquery_alias: bool = True, 2733 parse_set_operation: bool = True, 2734 ) -> t.Optional[exp.Expression]: 2735 cte = self._parse_with() 2736 2737 if cte: 2738 this = self._parse_statement() 2739 2740 if not this: 2741 self.raise_error("Failed to parse any statement following CTE") 2742 return cte 2743 2744 if "with" in this.arg_types: 2745 this.set("with", cte) 2746 else: 2747 self.raise_error(f"{this.key} does not support CTE") 2748 this = cte 2749 2750 return this 2751 2752 # duckdb supports leading with FROM x 2753 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2754 2755 if self._match(TokenType.SELECT): 2756 comments = self._prev_comments 2757 2758 hint = self._parse_hint() 2759 2760 if self._next and not self._next.token_type == TokenType.DOT: 2761 all_ = self._match(TokenType.ALL) 2762 distinct = self._match_set(self.DISTINCT_TOKENS) 2763 else: 2764 all_, distinct = None, None 2765 2766 kind = ( 2767 self._match(TokenType.ALIAS) 2768 and self._match_texts(("STRUCT", "VALUE")) 2769 and self._prev.text.upper() 2770 ) 2771 2772 if distinct: 2773 distinct = self.expression( 2774 exp.Distinct, 2775 on=self._parse_value() if self._match(TokenType.ON) else None, 2776 ) 2777 2778 if all_ and distinct: 2779 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2780 2781 limit = self._parse_limit(top=True) 2782 projections = self._parse_projections() 2783 2784 this = self.expression( 2785 exp.Select, 2786 kind=kind, 2787 hint=hint, 2788 distinct=distinct, 2789 expressions=projections, 2790 limit=limit, 2791 ) 2792 this.comments = comments 2793 2794 into = self._parse_into() 2795 if into: 2796 this.set("into", into) 2797 2798 if not from_: 2799 from_ = self._parse_from() 2800 2801 if from_: 2802 this.set("from", from_) 2803 2804 this = self._parse_query_modifiers(this) 2805 elif (table or nested) and self._match(TokenType.L_PAREN): 2806 if self._match(TokenType.PIVOT): 2807 this = self._parse_simplified_pivot() 2808 elif self._match(TokenType.FROM): 2809 this = exp.select("*").from_( 2810 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2811 ) 2812 else: 2813 this = ( 2814 self._parse_table() 2815 if table 2816 else self._parse_select(nested=True, parse_set_operation=False) 2817 ) 2818 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2819 2820 self._match_r_paren() 2821 2822 # We return early here so that the UNION isn't attached to the subquery by the 2823 # following call to _parse_set_operations, but instead becomes the parent node 2824 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2825 elif self._match(TokenType.VALUES, advance=False): 2826 this = self._parse_derived_table_values() 2827 elif from_: 2828 this = exp.select("*").from_(from_.this, copy=False) 2829 elif self._match(TokenType.SUMMARIZE): 2830 table = self._match(TokenType.TABLE) 2831 this = self._parse_select() or self._parse_string() or self._parse_table() 2832 return self.expression(exp.Summarize, this=this, table=table) 2833 else: 2834 this = None 2835 2836 return self._parse_set_operations(this) if parse_set_operation else this 2837 2838 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2839 if not skip_with_token and not self._match(TokenType.WITH): 2840 return None 2841 2842 comments = self._prev_comments 2843 recursive = self._match(TokenType.RECURSIVE) 2844 2845 expressions = [] 2846 while True: 2847 expressions.append(self._parse_cte()) 2848 2849 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2850 break 2851 else: 2852 self._match(TokenType.WITH) 2853 2854 return self.expression( 2855 exp.With, comments=comments, expressions=expressions, recursive=recursive 2856 ) 2857 2858 def _parse_cte(self) -> exp.CTE: 2859 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2860 if not alias or not alias.this: 2861 self.raise_error("Expected CTE to have alias") 2862 2863 self._match(TokenType.ALIAS) 2864 comments = self._prev_comments 2865 2866 if self._match_text_seq("NOT", "MATERIALIZED"): 2867 materialized = False 2868 elif self._match_text_seq("MATERIALIZED"): 2869 materialized = True 2870 else: 2871 materialized = None 2872 2873 return self.expression( 2874 exp.CTE, 2875 this=self._parse_wrapped(self._parse_statement), 2876 alias=alias, 2877 materialized=materialized, 2878 comments=comments, 2879 ) 2880 2881 def _parse_table_alias( 2882 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2883 ) -> t.Optional[exp.TableAlias]: 2884 any_token = self._match(TokenType.ALIAS) 2885 alias = ( 2886 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2887 or self._parse_string_as_identifier() 2888 ) 2889 2890 index = self._index 2891 if self._match(TokenType.L_PAREN): 2892 columns = self._parse_csv(self._parse_function_parameter) 2893 self._match_r_paren() if columns else self._retreat(index) 2894 else: 2895 columns = None 2896 2897 if not alias and not columns: 2898 return None 2899 2900 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2901 2902 # We bubble up comments from the Identifier to the TableAlias 2903 if isinstance(alias, exp.Identifier): 2904 table_alias.add_comments(alias.pop_comments()) 2905 2906 return table_alias 2907 2908 def _parse_subquery( 2909 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2910 ) -> t.Optional[exp.Subquery]: 2911 if not this: 2912 return None 2913 2914 return self.expression( 2915 exp.Subquery, 2916 this=this, 2917 pivots=self._parse_pivots(), 2918 alias=self._parse_table_alias() if parse_alias else None, 2919 ) 2920 2921 def _implicit_unnests_to_explicit(self, this: E) -> E: 2922 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2923 2924 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2925 for i, join in enumerate(this.args.get("joins") or []): 2926 table = join.this 2927 normalized_table = table.copy() 2928 normalized_table.meta["maybe_column"] = True 2929 normalized_table = _norm(normalized_table, dialect=self.dialect) 2930 2931 if isinstance(table, exp.Table) and not join.args.get("on"): 2932 if normalized_table.parts[0].name in refs: 2933 table_as_column = table.to_column() 2934 unnest = exp.Unnest(expressions=[table_as_column]) 2935 2936 # Table.to_column creates a parent Alias node that we want to convert to 2937 # a TableAlias and attach to the Unnest, so it matches the parser's output 2938 if isinstance(table.args.get("alias"), exp.TableAlias): 2939 table_as_column.replace(table_as_column.this) 2940 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2941 2942 table.replace(unnest) 2943 2944 refs.add(normalized_table.alias_or_name) 2945 2946 return this 2947 2948 def _parse_query_modifiers( 2949 self, this: t.Optional[exp.Expression] 2950 ) -> t.Optional[exp.Expression]: 2951 if isinstance(this, (exp.Query, exp.Table)): 2952 for join in self._parse_joins(): 2953 this.append("joins", join) 2954 for lateral in iter(self._parse_lateral, None): 2955 this.append("laterals", lateral) 2956 2957 while True: 2958 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2959 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2960 key, expression = parser(self) 2961 2962 if expression: 2963 this.set(key, expression) 2964 if key == "limit": 2965 offset = expression.args.pop("offset", None) 2966 2967 if offset: 2968 offset = exp.Offset(expression=offset) 2969 this.set("offset", offset) 2970 2971 limit_by_expressions = expression.expressions 2972 expression.set("expressions", None) 2973 offset.set("expressions", limit_by_expressions) 2974 continue 2975 break 2976 2977 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2978 this = self._implicit_unnests_to_explicit(this) 2979 2980 return this 2981 2982 def _parse_hint(self) -> t.Optional[exp.Hint]: 2983 if self._match(TokenType.HINT): 2984 hints = [] 2985 for hint in iter( 2986 lambda: self._parse_csv( 2987 lambda: self._parse_function() or self._parse_var(upper=True) 2988 ), 2989 [], 2990 ): 2991 hints.extend(hint) 2992 2993 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2994 self.raise_error("Expected */ after HINT") 2995 2996 return self.expression(exp.Hint, expressions=hints) 2997 2998 return None 2999 3000 def _parse_into(self) -> t.Optional[exp.Into]: 3001 if not self._match(TokenType.INTO): 3002 return None 3003 3004 temp = self._match(TokenType.TEMPORARY) 3005 unlogged = self._match_text_seq("UNLOGGED") 3006 self._match(TokenType.TABLE) 3007 3008 return self.expression( 3009 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3010 ) 3011 3012 def _parse_from( 3013 self, joins: bool = False, skip_from_token: bool = False 3014 ) -> t.Optional[exp.From]: 3015 if not skip_from_token and not self._match(TokenType.FROM): 3016 return None 3017 3018 return self.expression( 3019 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3020 ) 3021 3022 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3023 return self.expression( 3024 exp.MatchRecognizeMeasure, 3025 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3026 this=self._parse_expression(), 3027 ) 3028 3029 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3030 if not self._match(TokenType.MATCH_RECOGNIZE): 3031 return None 3032 3033 self._match_l_paren() 3034 3035 partition = self._parse_partition_by() 3036 order = self._parse_order() 3037 3038 measures = ( 3039 self._parse_csv(self._parse_match_recognize_measure) 3040 if self._match_text_seq("MEASURES") 3041 else None 3042 ) 3043 3044 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3045 rows = exp.var("ONE ROW PER MATCH") 3046 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3047 text = "ALL ROWS PER MATCH" 3048 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3049 text += " SHOW EMPTY MATCHES" 3050 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3051 text += " OMIT EMPTY MATCHES" 3052 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3053 text += " WITH UNMATCHED ROWS" 3054 rows = exp.var(text) 3055 else: 3056 rows = None 3057 3058 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3059 text = "AFTER MATCH SKIP" 3060 if self._match_text_seq("PAST", "LAST", "ROW"): 3061 text += " PAST LAST ROW" 3062 elif self._match_text_seq("TO", "NEXT", "ROW"): 3063 text += " TO NEXT ROW" 3064 elif self._match_text_seq("TO", "FIRST"): 3065 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3066 elif self._match_text_seq("TO", "LAST"): 3067 text += f" TO LAST {self._advance_any().text}" # type: ignore 3068 after = exp.var(text) 3069 else: 3070 after = None 3071 3072 if self._match_text_seq("PATTERN"): 3073 self._match_l_paren() 3074 3075 if not self._curr: 3076 self.raise_error("Expecting )", self._curr) 3077 3078 paren = 1 3079 start = self._curr 3080 3081 while self._curr and paren > 0: 3082 if self._curr.token_type == TokenType.L_PAREN: 3083 paren += 1 3084 if self._curr.token_type == TokenType.R_PAREN: 3085 paren -= 1 3086 3087 end = self._prev 3088 self._advance() 3089 3090 if paren > 0: 3091 self.raise_error("Expecting )", self._curr) 3092 3093 pattern = exp.var(self._find_sql(start, end)) 3094 else: 3095 pattern = None 3096 3097 define = ( 3098 self._parse_csv(self._parse_name_as_expression) 3099 if self._match_text_seq("DEFINE") 3100 else None 3101 ) 3102 3103 self._match_r_paren() 3104 3105 return self.expression( 3106 exp.MatchRecognize, 3107 partition_by=partition, 3108 order=order, 3109 measures=measures, 3110 rows=rows, 3111 after=after, 3112 pattern=pattern, 3113 define=define, 3114 alias=self._parse_table_alias(), 3115 ) 3116 3117 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3118 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3119 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3120 cross_apply = False 3121 3122 if cross_apply is not None: 3123 this = self._parse_select(table=True) 3124 view = None 3125 outer = None 3126 elif self._match(TokenType.LATERAL): 3127 this = self._parse_select(table=True) 3128 view = self._match(TokenType.VIEW) 3129 outer = self._match(TokenType.OUTER) 3130 else: 3131 return None 3132 3133 if not this: 3134 this = ( 3135 self._parse_unnest() 3136 or self._parse_function() 3137 or self._parse_id_var(any_token=False) 3138 ) 3139 3140 while self._match(TokenType.DOT): 3141 this = exp.Dot( 3142 this=this, 3143 expression=self._parse_function() or self._parse_id_var(any_token=False), 3144 ) 3145 3146 if view: 3147 table = self._parse_id_var(any_token=False) 3148 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3149 table_alias: t.Optional[exp.TableAlias] = self.expression( 3150 exp.TableAlias, this=table, columns=columns 3151 ) 3152 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3153 # We move the alias from the lateral's child node to the lateral itself 3154 table_alias = this.args["alias"].pop() 3155 else: 3156 table_alias = self._parse_table_alias() 3157 3158 return self.expression( 3159 exp.Lateral, 3160 this=this, 3161 view=view, 3162 outer=outer, 3163 alias=table_alias, 3164 cross_apply=cross_apply, 3165 ) 3166 3167 def _parse_join_parts( 3168 self, 3169 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3170 return ( 3171 self._match_set(self.JOIN_METHODS) and self._prev, 3172 self._match_set(self.JOIN_SIDES) and self._prev, 3173 self._match_set(self.JOIN_KINDS) and self._prev, 3174 ) 3175 3176 def _parse_join( 3177 self, skip_join_token: bool = False, parse_bracket: bool = False 3178 ) -> t.Optional[exp.Join]: 3179 if self._match(TokenType.COMMA): 3180 return self.expression(exp.Join, this=self._parse_table()) 3181 3182 index = self._index 3183 method, side, kind = self._parse_join_parts() 3184 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3185 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3186 3187 if not skip_join_token and not join: 3188 self._retreat(index) 3189 kind = None 3190 method = None 3191 side = None 3192 3193 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3194 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3195 3196 if not skip_join_token and not join and not outer_apply and not cross_apply: 3197 return None 3198 3199 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3200 3201 if method: 3202 kwargs["method"] = method.text 3203 if side: 3204 kwargs["side"] = side.text 3205 if kind: 3206 kwargs["kind"] = kind.text 3207 if hint: 3208 kwargs["hint"] = hint 3209 3210 if self._match(TokenType.MATCH_CONDITION): 3211 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3212 3213 if self._match(TokenType.ON): 3214 kwargs["on"] = self._parse_assignment() 3215 elif self._match(TokenType.USING): 3216 kwargs["using"] = self._parse_wrapped_id_vars() 3217 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3218 kind and kind.token_type == TokenType.CROSS 3219 ): 3220 index = self._index 3221 joins: t.Optional[list] = list(self._parse_joins()) 3222 3223 if joins and self._match(TokenType.ON): 3224 kwargs["on"] = self._parse_assignment() 3225 elif joins and self._match(TokenType.USING): 3226 kwargs["using"] = self._parse_wrapped_id_vars() 3227 else: 3228 joins = None 3229 self._retreat(index) 3230 3231 kwargs["this"].set("joins", joins if joins else None) 3232 3233 comments = [c for token in (method, side, kind) if token for c in token.comments] 3234 return self.expression(exp.Join, comments=comments, **kwargs) 3235 3236 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3237 this = self._parse_assignment() 3238 3239 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3240 return this 3241 3242 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3243 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3244 3245 return this 3246 3247 def _parse_index_params(self) -> exp.IndexParameters: 3248 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3249 3250 if self._match(TokenType.L_PAREN, advance=False): 3251 columns = self._parse_wrapped_csv(self._parse_with_operator) 3252 else: 3253 columns = None 3254 3255 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3256 partition_by = self._parse_partition_by() 3257 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3258 tablespace = ( 3259 self._parse_var(any_token=True) 3260 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3261 else None 3262 ) 3263 where = self._parse_where() 3264 3265 on = self._parse_field() if self._match(TokenType.ON) else None 3266 3267 return self.expression( 3268 exp.IndexParameters, 3269 using=using, 3270 columns=columns, 3271 include=include, 3272 partition_by=partition_by, 3273 where=where, 3274 with_storage=with_storage, 3275 tablespace=tablespace, 3276 on=on, 3277 ) 3278 3279 def _parse_index( 3280 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3281 ) -> t.Optional[exp.Index]: 3282 if index or anonymous: 3283 unique = None 3284 primary = None 3285 amp = None 3286 3287 self._match(TokenType.ON) 3288 self._match(TokenType.TABLE) # hive 3289 table = self._parse_table_parts(schema=True) 3290 else: 3291 unique = self._match(TokenType.UNIQUE) 3292 primary = self._match_text_seq("PRIMARY") 3293 amp = self._match_text_seq("AMP") 3294 3295 if not self._match(TokenType.INDEX): 3296 return None 3297 3298 index = self._parse_id_var() 3299 table = None 3300 3301 params = self._parse_index_params() 3302 3303 return self.expression( 3304 exp.Index, 3305 this=index, 3306 table=table, 3307 unique=unique, 3308 primary=primary, 3309 amp=amp, 3310 params=params, 3311 ) 3312 3313 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3314 hints: t.List[exp.Expression] = [] 3315 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3316 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3317 hints.append( 3318 self.expression( 3319 exp.WithTableHint, 3320 expressions=self._parse_csv( 3321 lambda: self._parse_function() or self._parse_var(any_token=True) 3322 ), 3323 ) 3324 ) 3325 self._match_r_paren() 3326 else: 3327 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3328 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3329 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3330 3331 self._match_set((TokenType.INDEX, TokenType.KEY)) 3332 if self._match(TokenType.FOR): 3333 hint.set("target", self._advance_any() and self._prev.text.upper()) 3334 3335 hint.set("expressions", self._parse_wrapped_id_vars()) 3336 hints.append(hint) 3337 3338 return hints or None 3339 3340 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3341 return ( 3342 (not schema and self._parse_function(optional_parens=False)) 3343 or self._parse_id_var(any_token=False) 3344 or self._parse_string_as_identifier() 3345 or self._parse_placeholder() 3346 ) 3347 3348 def _parse_table_parts( 3349 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3350 ) -> exp.Table: 3351 catalog = None 3352 db = None 3353 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3354 3355 while self._match(TokenType.DOT): 3356 if catalog: 3357 # This allows nesting the table in arbitrarily many dot expressions if needed 3358 table = self.expression( 3359 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3360 ) 3361 else: 3362 catalog = db 3363 db = table 3364 # "" used for tsql FROM a..b case 3365 table = self._parse_table_part(schema=schema) or "" 3366 3367 if ( 3368 wildcard 3369 and self._is_connected() 3370 and (isinstance(table, exp.Identifier) or not table) 3371 and self._match(TokenType.STAR) 3372 ): 3373 if isinstance(table, exp.Identifier): 3374 table.args["this"] += "*" 3375 else: 3376 table = exp.Identifier(this="*") 3377 3378 # We bubble up comments from the Identifier to the Table 3379 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3380 3381 if is_db_reference: 3382 catalog = db 3383 db = table 3384 table = None 3385 3386 if not table and not is_db_reference: 3387 self.raise_error(f"Expected table name but got {self._curr}") 3388 if not db and is_db_reference: 3389 self.raise_error(f"Expected database name but got {self._curr}") 3390 3391 table = self.expression( 3392 exp.Table, 3393 comments=comments, 3394 this=table, 3395 db=db, 3396 catalog=catalog, 3397 ) 3398 3399 changes = self._parse_changes() 3400 if changes: 3401 table.set("changes", changes) 3402 3403 at_before = self._parse_historical_data() 3404 if at_before: 3405 table.set("when", at_before) 3406 3407 pivots = self._parse_pivots() 3408 if pivots: 3409 table.set("pivots", pivots) 3410 3411 return table 3412 3413 def _parse_table( 3414 self, 3415 schema: bool = False, 3416 joins: bool = False, 3417 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3418 parse_bracket: bool = False, 3419 is_db_reference: bool = False, 3420 parse_partition: bool = False, 3421 ) -> t.Optional[exp.Expression]: 3422 lateral = self._parse_lateral() 3423 if lateral: 3424 return lateral 3425 3426 unnest = self._parse_unnest() 3427 if unnest: 3428 return unnest 3429 3430 values = self._parse_derived_table_values() 3431 if values: 3432 return values 3433 3434 subquery = self._parse_select(table=True) 3435 if subquery: 3436 if not subquery.args.get("pivots"): 3437 subquery.set("pivots", self._parse_pivots()) 3438 return subquery 3439 3440 bracket = parse_bracket and self._parse_bracket(None) 3441 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3442 3443 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3444 self._parse_table 3445 ) 3446 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3447 3448 only = self._match(TokenType.ONLY) 3449 3450 this = t.cast( 3451 exp.Expression, 3452 bracket 3453 or rows_from 3454 or self._parse_bracket( 3455 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3456 ), 3457 ) 3458 3459 if only: 3460 this.set("only", only) 3461 3462 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3463 self._match_text_seq("*") 3464 3465 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3466 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3467 this.set("partition", self._parse_partition()) 3468 3469 if schema: 3470 return self._parse_schema(this=this) 3471 3472 version = self._parse_version() 3473 3474 if version: 3475 this.set("version", version) 3476 3477 if self.dialect.ALIAS_POST_TABLESAMPLE: 3478 table_sample = self._parse_table_sample() 3479 3480 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3481 if alias: 3482 this.set("alias", alias) 3483 3484 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3485 return self.expression( 3486 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3487 ) 3488 3489 this.set("hints", self._parse_table_hints()) 3490 3491 if not this.args.get("pivots"): 3492 this.set("pivots", self._parse_pivots()) 3493 3494 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3495 table_sample = self._parse_table_sample() 3496 3497 if table_sample: 3498 table_sample.set("this", this) 3499 this = table_sample 3500 3501 if joins: 3502 for join in self._parse_joins(): 3503 this.append("joins", join) 3504 3505 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3506 this.set("ordinality", True) 3507 this.set("alias", self._parse_table_alias()) 3508 3509 return this 3510 3511 def _parse_version(self) -> t.Optional[exp.Version]: 3512 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3513 this = "TIMESTAMP" 3514 elif self._match(TokenType.VERSION_SNAPSHOT): 3515 this = "VERSION" 3516 else: 3517 return None 3518 3519 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3520 kind = self._prev.text.upper() 3521 start = self._parse_bitwise() 3522 self._match_texts(("TO", "AND")) 3523 end = self._parse_bitwise() 3524 expression: t.Optional[exp.Expression] = self.expression( 3525 exp.Tuple, expressions=[start, end] 3526 ) 3527 elif self._match_text_seq("CONTAINED", "IN"): 3528 kind = "CONTAINED IN" 3529 expression = self.expression( 3530 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3531 ) 3532 elif self._match(TokenType.ALL): 3533 kind = "ALL" 3534 expression = None 3535 else: 3536 self._match_text_seq("AS", "OF") 3537 kind = "AS OF" 3538 expression = self._parse_type() 3539 3540 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3541 3542 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3543 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3544 index = self._index 3545 historical_data = None 3546 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3547 this = self._prev.text.upper() 3548 kind = ( 3549 self._match(TokenType.L_PAREN) 3550 and self._match_texts(self.HISTORICAL_DATA_KIND) 3551 and self._prev.text.upper() 3552 ) 3553 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3554 3555 if expression: 3556 self._match_r_paren() 3557 historical_data = self.expression( 3558 exp.HistoricalData, this=this, kind=kind, expression=expression 3559 ) 3560 else: 3561 self._retreat(index) 3562 3563 return historical_data 3564 3565 def _parse_changes(self) -> t.Optional[exp.Changes]: 3566 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3567 return None 3568 3569 information = self._parse_var(any_token=True) 3570 self._match_r_paren() 3571 3572 return self.expression( 3573 exp.Changes, 3574 information=information, 3575 at_before=self._parse_historical_data(), 3576 end=self._parse_historical_data(), 3577 ) 3578 3579 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3580 if not self._match(TokenType.UNNEST): 3581 return None 3582 3583 expressions = self._parse_wrapped_csv(self._parse_equality) 3584 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3585 3586 alias = self._parse_table_alias() if with_alias else None 3587 3588 if alias: 3589 if self.dialect.UNNEST_COLUMN_ONLY: 3590 if alias.args.get("columns"): 3591 self.raise_error("Unexpected extra column alias in unnest.") 3592 3593 alias.set("columns", [alias.this]) 3594 alias.set("this", None) 3595 3596 columns = alias.args.get("columns") or [] 3597 if offset and len(expressions) < len(columns): 3598 offset = columns.pop() 3599 3600 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3601 self._match(TokenType.ALIAS) 3602 offset = self._parse_id_var( 3603 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3604 ) or exp.to_identifier("offset") 3605 3606 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3607 3608 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3609 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3610 if not is_derived and not self._match_text_seq("VALUES"): 3611 return None 3612 3613 expressions = self._parse_csv(self._parse_value) 3614 alias = self._parse_table_alias() 3615 3616 if is_derived: 3617 self._match_r_paren() 3618 3619 return self.expression( 3620 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3621 ) 3622 3623 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3624 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3625 as_modifier and self._match_text_seq("USING", "SAMPLE") 3626 ): 3627 return None 3628 3629 bucket_numerator = None 3630 bucket_denominator = None 3631 bucket_field = None 3632 percent = None 3633 size = None 3634 seed = None 3635 3636 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3637 matched_l_paren = self._match(TokenType.L_PAREN) 3638 3639 if self.TABLESAMPLE_CSV: 3640 num = None 3641 expressions = self._parse_csv(self._parse_primary) 3642 else: 3643 expressions = None 3644 num = ( 3645 self._parse_factor() 3646 if self._match(TokenType.NUMBER, advance=False) 3647 else self._parse_primary() or self._parse_placeholder() 3648 ) 3649 3650 if self._match_text_seq("BUCKET"): 3651 bucket_numerator = self._parse_number() 3652 self._match_text_seq("OUT", "OF") 3653 bucket_denominator = bucket_denominator = self._parse_number() 3654 self._match(TokenType.ON) 3655 bucket_field = self._parse_field() 3656 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3657 percent = num 3658 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3659 size = num 3660 else: 3661 percent = num 3662 3663 if matched_l_paren: 3664 self._match_r_paren() 3665 3666 if self._match(TokenType.L_PAREN): 3667 method = self._parse_var(upper=True) 3668 seed = self._match(TokenType.COMMA) and self._parse_number() 3669 self._match_r_paren() 3670 elif self._match_texts(("SEED", "REPEATABLE")): 3671 seed = self._parse_wrapped(self._parse_number) 3672 3673 if not method and self.DEFAULT_SAMPLING_METHOD: 3674 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3675 3676 return self.expression( 3677 exp.TableSample, 3678 expressions=expressions, 3679 method=method, 3680 bucket_numerator=bucket_numerator, 3681 bucket_denominator=bucket_denominator, 3682 bucket_field=bucket_field, 3683 percent=percent, 3684 size=size, 3685 seed=seed, 3686 ) 3687 3688 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3689 return list(iter(self._parse_pivot, None)) or None 3690 3691 def _parse_joins(self) -> t.Iterator[exp.Join]: 3692 return iter(self._parse_join, None) 3693 3694 # https://duckdb.org/docs/sql/statements/pivot 3695 def _parse_simplified_pivot(self) -> exp.Pivot: 3696 def _parse_on() -> t.Optional[exp.Expression]: 3697 this = self._parse_bitwise() 3698 return self._parse_in(this) if self._match(TokenType.IN) else this 3699 3700 this = self._parse_table() 3701 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3702 using = self._match(TokenType.USING) and self._parse_csv( 3703 lambda: self._parse_alias(self._parse_function()) 3704 ) 3705 group = self._parse_group() 3706 return self.expression( 3707 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3708 ) 3709 3710 def _parse_pivot_in(self) -> exp.In: 3711 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3712 this = self._parse_assignment() 3713 3714 self._match(TokenType.ALIAS) 3715 alias = self._parse_field() 3716 if alias: 3717 return self.expression(exp.PivotAlias, this=this, alias=alias) 3718 3719 return this 3720 3721 value = self._parse_column() 3722 3723 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3724 self.raise_error("Expecting IN (") 3725 3726 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3727 3728 self._match_r_paren() 3729 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3730 3731 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3732 index = self._index 3733 include_nulls = None 3734 3735 if self._match(TokenType.PIVOT): 3736 unpivot = False 3737 elif self._match(TokenType.UNPIVOT): 3738 unpivot = True 3739 3740 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3741 if self._match_text_seq("INCLUDE", "NULLS"): 3742 include_nulls = True 3743 elif self._match_text_seq("EXCLUDE", "NULLS"): 3744 include_nulls = False 3745 else: 3746 return None 3747 3748 expressions = [] 3749 3750 if not self._match(TokenType.L_PAREN): 3751 self._retreat(index) 3752 return None 3753 3754 if unpivot: 3755 expressions = self._parse_csv(self._parse_column) 3756 else: 3757 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3758 3759 if not expressions: 3760 self.raise_error("Failed to parse PIVOT's aggregation list") 3761 3762 if not self._match(TokenType.FOR): 3763 self.raise_error("Expecting FOR") 3764 3765 field = self._parse_pivot_in() 3766 3767 self._match_r_paren() 3768 3769 pivot = self.expression( 3770 exp.Pivot, 3771 expressions=expressions, 3772 field=field, 3773 unpivot=unpivot, 3774 include_nulls=include_nulls, 3775 ) 3776 3777 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3778 pivot.set("alias", self._parse_table_alias()) 3779 3780 if not unpivot: 3781 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3782 3783 columns: t.List[exp.Expression] = [] 3784 for fld in pivot.args["field"].expressions: 3785 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3786 for name in names: 3787 if self.PREFIXED_PIVOT_COLUMNS: 3788 name = f"{name}_{field_name}" if name else field_name 3789 else: 3790 name = f"{field_name}_{name}" if name else field_name 3791 3792 columns.append(exp.to_identifier(name)) 3793 3794 pivot.set("columns", columns) 3795 3796 return pivot 3797 3798 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3799 return [agg.alias for agg in aggregations] 3800 3801 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3802 if not skip_where_token and not self._match(TokenType.PREWHERE): 3803 return None 3804 3805 return self.expression( 3806 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3807 ) 3808 3809 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3810 if not skip_where_token and not self._match(TokenType.WHERE): 3811 return None 3812 3813 return self.expression( 3814 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3815 ) 3816 3817 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3818 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3819 return None 3820 3821 elements: t.Dict[str, t.Any] = defaultdict(list) 3822 3823 if self._match(TokenType.ALL): 3824 elements["all"] = True 3825 elif self._match(TokenType.DISTINCT): 3826 elements["all"] = False 3827 3828 while True: 3829 expressions = self._parse_csv( 3830 lambda: None 3831 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3832 else self._parse_assignment() 3833 ) 3834 if expressions: 3835 elements["expressions"].extend(expressions) 3836 3837 grouping_sets = self._parse_grouping_sets() 3838 if grouping_sets: 3839 elements["grouping_sets"].extend(grouping_sets) 3840 3841 rollup = None 3842 cube = None 3843 totals = None 3844 3845 index = self._index 3846 with_ = self._match(TokenType.WITH) 3847 if self._match(TokenType.ROLLUP): 3848 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3849 elements["rollup"].extend(ensure_list(rollup)) 3850 3851 if self._match(TokenType.CUBE): 3852 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3853 elements["cube"].extend(ensure_list(cube)) 3854 3855 if self._match_text_seq("TOTALS"): 3856 totals = True 3857 elements["totals"] = True # type: ignore 3858 3859 if not (grouping_sets or rollup or cube or totals): 3860 if with_: 3861 self._retreat(index) 3862 break 3863 3864 return self.expression(exp.Group, **elements) # type: ignore 3865 3866 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3867 if not self._match(TokenType.GROUPING_SETS): 3868 return None 3869 3870 return self._parse_wrapped_csv(self._parse_grouping_set) 3871 3872 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3873 if self._match(TokenType.L_PAREN): 3874 grouping_set = self._parse_csv(self._parse_column) 3875 self._match_r_paren() 3876 return self.expression(exp.Tuple, expressions=grouping_set) 3877 3878 return self._parse_column() 3879 3880 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3881 if not skip_having_token and not self._match(TokenType.HAVING): 3882 return None 3883 return self.expression(exp.Having, this=self._parse_assignment()) 3884 3885 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3886 if not self._match(TokenType.QUALIFY): 3887 return None 3888 return self.expression(exp.Qualify, this=self._parse_assignment()) 3889 3890 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3891 if skip_start_token: 3892 start = None 3893 elif self._match(TokenType.START_WITH): 3894 start = self._parse_assignment() 3895 else: 3896 return None 3897 3898 self._match(TokenType.CONNECT_BY) 3899 nocycle = self._match_text_seq("NOCYCLE") 3900 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3901 exp.Prior, this=self._parse_bitwise() 3902 ) 3903 connect = self._parse_assignment() 3904 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3905 3906 if not start and self._match(TokenType.START_WITH): 3907 start = self._parse_assignment() 3908 3909 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3910 3911 def _parse_name_as_expression(self) -> exp.Alias: 3912 return self.expression( 3913 exp.Alias, 3914 alias=self._parse_id_var(any_token=True), 3915 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3916 ) 3917 3918 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3919 if self._match_text_seq("INTERPOLATE"): 3920 return self._parse_wrapped_csv(self._parse_name_as_expression) 3921 return None 3922 3923 def _parse_order( 3924 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3925 ) -> t.Optional[exp.Expression]: 3926 siblings = None 3927 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3928 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3929 return this 3930 3931 siblings = True 3932 3933 return self.expression( 3934 exp.Order, 3935 this=this, 3936 expressions=self._parse_csv(self._parse_ordered), 3937 interpolate=self._parse_interpolate(), 3938 siblings=siblings, 3939 ) 3940 3941 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3942 if not self._match(token): 3943 return None 3944 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3945 3946 def _parse_ordered( 3947 self, parse_method: t.Optional[t.Callable] = None 3948 ) -> t.Optional[exp.Ordered]: 3949 this = parse_method() if parse_method else self._parse_assignment() 3950 if not this: 3951 return None 3952 3953 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3954 this = exp.var("ALL") 3955 3956 asc = self._match(TokenType.ASC) 3957 desc = self._match(TokenType.DESC) or (asc and False) 3958 3959 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3960 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3961 3962 nulls_first = is_nulls_first or False 3963 explicitly_null_ordered = is_nulls_first or is_nulls_last 3964 3965 if ( 3966 not explicitly_null_ordered 3967 and ( 3968 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3969 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3970 ) 3971 and self.dialect.NULL_ORDERING != "nulls_are_last" 3972 ): 3973 nulls_first = True 3974 3975 if self._match_text_seq("WITH", "FILL"): 3976 with_fill = self.expression( 3977 exp.WithFill, 3978 **{ # type: ignore 3979 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3980 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3981 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3982 }, 3983 ) 3984 else: 3985 with_fill = None 3986 3987 return self.expression( 3988 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3989 ) 3990 3991 def _parse_limit( 3992 self, 3993 this: t.Optional[exp.Expression] = None, 3994 top: bool = False, 3995 skip_limit_token: bool = False, 3996 ) -> t.Optional[exp.Expression]: 3997 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3998 comments = self._prev_comments 3999 if top: 4000 limit_paren = self._match(TokenType.L_PAREN) 4001 expression = self._parse_term() if limit_paren else self._parse_number() 4002 4003 if limit_paren: 4004 self._match_r_paren() 4005 else: 4006 expression = self._parse_term() 4007 4008 if self._match(TokenType.COMMA): 4009 offset = expression 4010 expression = self._parse_term() 4011 else: 4012 offset = None 4013 4014 limit_exp = self.expression( 4015 exp.Limit, 4016 this=this, 4017 expression=expression, 4018 offset=offset, 4019 comments=comments, 4020 expressions=self._parse_limit_by(), 4021 ) 4022 4023 return limit_exp 4024 4025 if self._match(TokenType.FETCH): 4026 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4027 direction = self._prev.text.upper() if direction else "FIRST" 4028 4029 count = self._parse_field(tokens=self.FETCH_TOKENS) 4030 percent = self._match(TokenType.PERCENT) 4031 4032 self._match_set((TokenType.ROW, TokenType.ROWS)) 4033 4034 only = self._match_text_seq("ONLY") 4035 with_ties = self._match_text_seq("WITH", "TIES") 4036 4037 if only and with_ties: 4038 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4039 4040 return self.expression( 4041 exp.Fetch, 4042 direction=direction, 4043 count=count, 4044 percent=percent, 4045 with_ties=with_ties, 4046 ) 4047 4048 return this 4049 4050 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4051 if not self._match(TokenType.OFFSET): 4052 return this 4053 4054 count = self._parse_term() 4055 self._match_set((TokenType.ROW, TokenType.ROWS)) 4056 4057 return self.expression( 4058 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4059 ) 4060 4061 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4062 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4063 4064 def _parse_locks(self) -> t.List[exp.Lock]: 4065 locks = [] 4066 while True: 4067 if self._match_text_seq("FOR", "UPDATE"): 4068 update = True 4069 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4070 "LOCK", "IN", "SHARE", "MODE" 4071 ): 4072 update = False 4073 else: 4074 break 4075 4076 expressions = None 4077 if self._match_text_seq("OF"): 4078 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4079 4080 wait: t.Optional[bool | exp.Expression] = None 4081 if self._match_text_seq("NOWAIT"): 4082 wait = True 4083 elif self._match_text_seq("WAIT"): 4084 wait = self._parse_primary() 4085 elif self._match_text_seq("SKIP", "LOCKED"): 4086 wait = False 4087 4088 locks.append( 4089 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4090 ) 4091 4092 return locks 4093 4094 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4095 while this and self._match_set(self.SET_OPERATIONS): 4096 token_type = self._prev.token_type 4097 4098 if token_type == TokenType.UNION: 4099 operation: t.Type[exp.SetOperation] = exp.Union 4100 elif token_type == TokenType.EXCEPT: 4101 operation = exp.Except 4102 else: 4103 operation = exp.Intersect 4104 4105 comments = self._prev.comments 4106 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4107 by_name = self._match_text_seq("BY", "NAME") 4108 expression = self._parse_select(nested=True, parse_set_operation=False) 4109 4110 this = self.expression( 4111 operation, 4112 comments=comments, 4113 this=this, 4114 distinct=distinct, 4115 by_name=by_name, 4116 expression=expression, 4117 ) 4118 4119 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4120 expression = this.expression 4121 4122 if expression: 4123 for arg in self.SET_OP_MODIFIERS: 4124 expr = expression.args.get(arg) 4125 if expr: 4126 this.set(arg, expr.pop()) 4127 4128 return this 4129 4130 def _parse_expression(self) -> t.Optional[exp.Expression]: 4131 return self._parse_alias(self._parse_assignment()) 4132 4133 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4134 this = self._parse_disjunction() 4135 4136 while self._match_set(self.ASSIGNMENT): 4137 this = self.expression( 4138 self.ASSIGNMENT[self._prev.token_type], 4139 this=this, 4140 comments=self._prev_comments, 4141 expression=self._parse_assignment(), 4142 ) 4143 4144 return this 4145 4146 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4147 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4148 4149 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4150 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4151 4152 def _parse_equality(self) -> t.Optional[exp.Expression]: 4153 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4154 4155 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4156 return self._parse_tokens(self._parse_range, self.COMPARISON) 4157 4158 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4159 this = this or self._parse_bitwise() 4160 negate = self._match(TokenType.NOT) 4161 4162 if self._match_set(self.RANGE_PARSERS): 4163 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4164 if not expression: 4165 return this 4166 4167 this = expression 4168 elif self._match(TokenType.ISNULL): 4169 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4170 4171 # Postgres supports ISNULL and NOTNULL for conditions. 4172 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4173 if self._match(TokenType.NOTNULL): 4174 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4175 this = self.expression(exp.Not, this=this) 4176 4177 if negate: 4178 this = self.expression(exp.Not, this=this) 4179 4180 if self._match(TokenType.IS): 4181 this = self._parse_is(this) 4182 4183 return this 4184 4185 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4186 index = self._index - 1 4187 negate = self._match(TokenType.NOT) 4188 4189 if self._match_text_seq("DISTINCT", "FROM"): 4190 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4191 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4192 4193 expression = self._parse_null() or self._parse_boolean() 4194 if not expression: 4195 self._retreat(index) 4196 return None 4197 4198 this = self.expression(exp.Is, this=this, expression=expression) 4199 return self.expression(exp.Not, this=this) if negate else this 4200 4201 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4202 unnest = self._parse_unnest(with_alias=False) 4203 if unnest: 4204 this = self.expression(exp.In, this=this, unnest=unnest) 4205 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4206 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4207 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4208 4209 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4210 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4211 else: 4212 this = self.expression(exp.In, this=this, expressions=expressions) 4213 4214 if matched_l_paren: 4215 self._match_r_paren(this) 4216 elif not self._match(TokenType.R_BRACKET, expression=this): 4217 self.raise_error("Expecting ]") 4218 else: 4219 this = self.expression(exp.In, this=this, field=self._parse_field()) 4220 4221 return this 4222 4223 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4224 low = self._parse_bitwise() 4225 self._match(TokenType.AND) 4226 high = self._parse_bitwise() 4227 return self.expression(exp.Between, this=this, low=low, high=high) 4228 4229 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4230 if not self._match(TokenType.ESCAPE): 4231 return this 4232 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4233 4234 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4235 index = self._index 4236 4237 if not self._match(TokenType.INTERVAL) and match_interval: 4238 return None 4239 4240 if self._match(TokenType.STRING, advance=False): 4241 this = self._parse_primary() 4242 else: 4243 this = self._parse_term() 4244 4245 if not this or ( 4246 isinstance(this, exp.Column) 4247 and not this.table 4248 and not this.this.quoted 4249 and this.name.upper() == "IS" 4250 ): 4251 self._retreat(index) 4252 return None 4253 4254 unit = self._parse_function() or ( 4255 not self._match(TokenType.ALIAS, advance=False) 4256 and self._parse_var(any_token=True, upper=True) 4257 ) 4258 4259 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4260 # each INTERVAL expression into this canonical form so it's easy to transpile 4261 if this and this.is_number: 4262 this = exp.Literal.string(this.to_py()) 4263 elif this and this.is_string: 4264 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4265 if len(parts) == 1: 4266 if unit: 4267 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4268 self._retreat(self._index - 1) 4269 4270 this = exp.Literal.string(parts[0][0]) 4271 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4272 4273 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4274 unit = self.expression( 4275 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4276 ) 4277 4278 interval = self.expression(exp.Interval, this=this, unit=unit) 4279 4280 index = self._index 4281 self._match(TokenType.PLUS) 4282 4283 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4284 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4285 return self.expression( 4286 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4287 ) 4288 4289 self._retreat(index) 4290 return interval 4291 4292 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4293 this = self._parse_term() 4294 4295 while True: 4296 if self._match_set(self.BITWISE): 4297 this = self.expression( 4298 self.BITWISE[self._prev.token_type], 4299 this=this, 4300 expression=self._parse_term(), 4301 ) 4302 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4303 this = self.expression( 4304 exp.DPipe, 4305 this=this, 4306 expression=self._parse_term(), 4307 safe=not self.dialect.STRICT_STRING_CONCAT, 4308 ) 4309 elif self._match(TokenType.DQMARK): 4310 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4311 elif self._match_pair(TokenType.LT, TokenType.LT): 4312 this = self.expression( 4313 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4314 ) 4315 elif self._match_pair(TokenType.GT, TokenType.GT): 4316 this = self.expression( 4317 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4318 ) 4319 else: 4320 break 4321 4322 return this 4323 4324 def _parse_term(self) -> t.Optional[exp.Expression]: 4325 return self._parse_tokens(self._parse_factor, self.TERM) 4326 4327 def _parse_factor(self) -> t.Optional[exp.Expression]: 4328 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4329 this = parse_method() 4330 4331 while self._match_set(self.FACTOR): 4332 klass = self.FACTOR[self._prev.token_type] 4333 comments = self._prev_comments 4334 expression = parse_method() 4335 4336 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4337 self._retreat(self._index - 1) 4338 return this 4339 4340 this = self.expression(klass, this=this, comments=comments, expression=expression) 4341 4342 if isinstance(this, exp.Div): 4343 this.args["typed"] = self.dialect.TYPED_DIVISION 4344 this.args["safe"] = self.dialect.SAFE_DIVISION 4345 4346 return this 4347 4348 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4349 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4350 4351 def _parse_unary(self) -> t.Optional[exp.Expression]: 4352 if self._match_set(self.UNARY_PARSERS): 4353 return self.UNARY_PARSERS[self._prev.token_type](self) 4354 return self._parse_at_time_zone(self._parse_type()) 4355 4356 def _parse_type( 4357 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4358 ) -> t.Optional[exp.Expression]: 4359 interval = parse_interval and self._parse_interval() 4360 if interval: 4361 return interval 4362 4363 index = self._index 4364 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4365 4366 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4367 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4368 if isinstance(data_type, exp.Cast): 4369 # This constructor can contain ops directly after it, for instance struct unnesting: 4370 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4371 return self._parse_column_ops(data_type) 4372 4373 if data_type: 4374 index2 = self._index 4375 this = self._parse_primary() 4376 4377 if isinstance(this, exp.Literal): 4378 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4379 if parser: 4380 return parser(self, this, data_type) 4381 4382 return self.expression(exp.Cast, this=this, to=data_type) 4383 4384 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4385 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4386 # 4387 # If the index difference here is greater than 1, that means the parser itself must have 4388 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4389 # 4390 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4391 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4392 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4393 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4394 # 4395 # In these cases, we don't really want to return the converted type, but instead retreat 4396 # and try to parse a Column or Identifier in the section below. 4397 if data_type.expressions and index2 - index > 1: 4398 self._retreat(index2) 4399 return self._parse_column_ops(data_type) 4400 4401 self._retreat(index) 4402 4403 if fallback_to_identifier: 4404 return self._parse_id_var() 4405 4406 this = self._parse_column() 4407 return this and self._parse_column_ops(this) 4408 4409 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4410 this = self._parse_type() 4411 if not this: 4412 return None 4413 4414 if isinstance(this, exp.Column) and not this.table: 4415 this = exp.var(this.name.upper()) 4416 4417 return self.expression( 4418 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4419 ) 4420 4421 def _parse_types( 4422 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4423 ) -> t.Optional[exp.Expression]: 4424 index = self._index 4425 4426 this: t.Optional[exp.Expression] = None 4427 prefix = self._match_text_seq("SYSUDTLIB", ".") 4428 4429 if not self._match_set(self.TYPE_TOKENS): 4430 identifier = allow_identifiers and self._parse_id_var( 4431 any_token=False, tokens=(TokenType.VAR,) 4432 ) 4433 if isinstance(identifier, exp.Identifier): 4434 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4435 4436 if len(tokens) != 1: 4437 self.raise_error("Unexpected identifier", self._prev) 4438 4439 if tokens[0].token_type in self.TYPE_TOKENS: 4440 self._prev = tokens[0] 4441 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4442 type_name = identifier.name 4443 4444 while self._match(TokenType.DOT): 4445 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4446 4447 this = exp.DataType.build(type_name, udt=True) 4448 else: 4449 self._retreat(self._index - 1) 4450 return None 4451 else: 4452 return None 4453 4454 type_token = self._prev.token_type 4455 4456 if type_token == TokenType.PSEUDO_TYPE: 4457 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4458 4459 if type_token == TokenType.OBJECT_IDENTIFIER: 4460 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4461 4462 # https://materialize.com/docs/sql/types/map/ 4463 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4464 key_type = self._parse_types( 4465 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4466 ) 4467 if not self._match(TokenType.FARROW): 4468 self._retreat(index) 4469 return None 4470 4471 value_type = self._parse_types( 4472 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4473 ) 4474 if not self._match(TokenType.R_BRACKET): 4475 self._retreat(index) 4476 return None 4477 4478 return exp.DataType( 4479 this=exp.DataType.Type.MAP, 4480 expressions=[key_type, value_type], 4481 nested=True, 4482 prefix=prefix, 4483 ) 4484 4485 nested = type_token in self.NESTED_TYPE_TOKENS 4486 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4487 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4488 expressions = None 4489 maybe_func = False 4490 4491 if self._match(TokenType.L_PAREN): 4492 if is_struct: 4493 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4494 elif nested: 4495 expressions = self._parse_csv( 4496 lambda: self._parse_types( 4497 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4498 ) 4499 ) 4500 elif type_token in self.ENUM_TYPE_TOKENS: 4501 expressions = self._parse_csv(self._parse_equality) 4502 elif is_aggregate: 4503 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4504 any_token=False, tokens=(TokenType.VAR,) 4505 ) 4506 if not func_or_ident or not self._match(TokenType.COMMA): 4507 return None 4508 expressions = self._parse_csv( 4509 lambda: self._parse_types( 4510 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4511 ) 4512 ) 4513 expressions.insert(0, func_or_ident) 4514 else: 4515 expressions = self._parse_csv(self._parse_type_size) 4516 4517 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4518 if type_token == TokenType.VECTOR and len(expressions) == 2: 4519 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4520 4521 if not expressions or not self._match(TokenType.R_PAREN): 4522 self._retreat(index) 4523 return None 4524 4525 maybe_func = True 4526 4527 values: t.Optional[t.List[exp.Expression]] = None 4528 4529 if nested and self._match(TokenType.LT): 4530 if is_struct: 4531 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4532 else: 4533 expressions = self._parse_csv( 4534 lambda: self._parse_types( 4535 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4536 ) 4537 ) 4538 4539 if not self._match(TokenType.GT): 4540 self.raise_error("Expecting >") 4541 4542 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4543 values = self._parse_csv(self._parse_assignment) 4544 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4545 4546 if type_token in self.TIMESTAMPS: 4547 if self._match_text_seq("WITH", "TIME", "ZONE"): 4548 maybe_func = False 4549 tz_type = ( 4550 exp.DataType.Type.TIMETZ 4551 if type_token in self.TIMES 4552 else exp.DataType.Type.TIMESTAMPTZ 4553 ) 4554 this = exp.DataType(this=tz_type, expressions=expressions) 4555 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4556 maybe_func = False 4557 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4558 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4559 maybe_func = False 4560 elif type_token == TokenType.INTERVAL: 4561 unit = self._parse_var(upper=True) 4562 if unit: 4563 if self._match_text_seq("TO"): 4564 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4565 4566 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4567 else: 4568 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4569 4570 if maybe_func and check_func: 4571 index2 = self._index 4572 peek = self._parse_string() 4573 4574 if not peek: 4575 self._retreat(index) 4576 return None 4577 4578 self._retreat(index2) 4579 4580 if not this: 4581 if self._match_text_seq("UNSIGNED"): 4582 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4583 if not unsigned_type_token: 4584 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4585 4586 type_token = unsigned_type_token or type_token 4587 4588 this = exp.DataType( 4589 this=exp.DataType.Type[type_token.value], 4590 expressions=expressions, 4591 nested=nested, 4592 prefix=prefix, 4593 ) 4594 4595 # Empty arrays/structs are allowed 4596 if values is not None: 4597 cls = exp.Struct if is_struct else exp.Array 4598 this = exp.cast(cls(expressions=values), this, copy=False) 4599 4600 elif expressions: 4601 this.set("expressions", expressions) 4602 4603 # https://materialize.com/docs/sql/types/list/#type-name 4604 while self._match(TokenType.LIST): 4605 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4606 4607 index = self._index 4608 4609 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4610 matched_array = self._match(TokenType.ARRAY) 4611 4612 while self._curr: 4613 matched_l_bracket = self._match(TokenType.L_BRACKET) 4614 if not matched_l_bracket and not matched_array: 4615 break 4616 4617 matched_array = False 4618 values = self._parse_csv(self._parse_assignment) or None 4619 if ( 4620 values 4621 and not schema 4622 and this.is_type(exp.DataType.Type.ARRAY, exp.DataType.Type.MAP) 4623 ): 4624 self._retreat(index) 4625 break 4626 4627 this = exp.DataType( 4628 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4629 ) 4630 self._match(TokenType.R_BRACKET) 4631 4632 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4633 converter = self.TYPE_CONVERTERS.get(this.this) 4634 if converter: 4635 this = converter(t.cast(exp.DataType, this)) 4636 4637 return this 4638 4639 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4640 index = self._index 4641 4642 if ( 4643 self._curr 4644 and self._next 4645 and self._curr.token_type in self.TYPE_TOKENS 4646 and self._next.token_type in self.TYPE_TOKENS 4647 ): 4648 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4649 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4650 this = self._parse_id_var() 4651 else: 4652 this = ( 4653 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4654 or self._parse_id_var() 4655 ) 4656 4657 self._match(TokenType.COLON) 4658 4659 if ( 4660 type_required 4661 and not isinstance(this, exp.DataType) 4662 and not self._match_set(self.TYPE_TOKENS, advance=False) 4663 ): 4664 self._retreat(index) 4665 return self._parse_types() 4666 4667 return self._parse_column_def(this) 4668 4669 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4670 if not self._match_text_seq("AT", "TIME", "ZONE"): 4671 return this 4672 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4673 4674 def _parse_column(self) -> t.Optional[exp.Expression]: 4675 this = self._parse_column_reference() 4676 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4677 4678 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4679 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4680 4681 return column 4682 4683 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4684 this = self._parse_field() 4685 if ( 4686 not this 4687 and self._match(TokenType.VALUES, advance=False) 4688 and self.VALUES_FOLLOWED_BY_PAREN 4689 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4690 ): 4691 this = self._parse_id_var() 4692 4693 if isinstance(this, exp.Identifier): 4694 # We bubble up comments from the Identifier to the Column 4695 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4696 4697 return this 4698 4699 def _parse_colon_as_variant_extract( 4700 self, this: t.Optional[exp.Expression] 4701 ) -> t.Optional[exp.Expression]: 4702 casts = [] 4703 json_path = [] 4704 4705 while self._match(TokenType.COLON): 4706 start_index = self._index 4707 4708 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4709 path = self._parse_column_ops( 4710 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4711 ) 4712 4713 # The cast :: operator has a lower precedence than the extraction operator :, so 4714 # we rearrange the AST appropriately to avoid casting the JSON path 4715 while isinstance(path, exp.Cast): 4716 casts.append(path.to) 4717 path = path.this 4718 4719 if casts: 4720 dcolon_offset = next( 4721 i 4722 for i, t in enumerate(self._tokens[start_index:]) 4723 if t.token_type == TokenType.DCOLON 4724 ) 4725 end_token = self._tokens[start_index + dcolon_offset - 1] 4726 else: 4727 end_token = self._prev 4728 4729 if path: 4730 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4731 4732 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4733 # Databricks transforms it back to the colon/dot notation 4734 if json_path: 4735 this = self.expression( 4736 exp.JSONExtract, 4737 this=this, 4738 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4739 variant_extract=True, 4740 ) 4741 4742 while casts: 4743 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4744 4745 return this 4746 4747 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4748 return self._parse_types() 4749 4750 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4751 this = self._parse_bracket(this) 4752 4753 while self._match_set(self.COLUMN_OPERATORS): 4754 op_token = self._prev.token_type 4755 op = self.COLUMN_OPERATORS.get(op_token) 4756 4757 if op_token == TokenType.DCOLON: 4758 field = self._parse_dcolon() 4759 if not field: 4760 self.raise_error("Expected type") 4761 elif op and self._curr: 4762 field = self._parse_column_reference() 4763 else: 4764 field = self._parse_field(any_token=True, anonymous_func=True) 4765 4766 if isinstance(field, exp.Func) and this: 4767 # bigquery allows function calls like x.y.count(...) 4768 # SAFE.SUBSTR(...) 4769 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4770 this = exp.replace_tree( 4771 this, 4772 lambda n: ( 4773 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4774 if n.table 4775 else n.this 4776 ) 4777 if isinstance(n, exp.Column) 4778 else n, 4779 ) 4780 4781 if op: 4782 this = op(self, this, field) 4783 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4784 this = self.expression( 4785 exp.Column, 4786 this=field, 4787 table=this.this, 4788 db=this.args.get("table"), 4789 catalog=this.args.get("db"), 4790 ) 4791 else: 4792 this = self.expression(exp.Dot, this=this, expression=field) 4793 4794 this = self._parse_bracket(this) 4795 4796 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4797 4798 def _parse_primary(self) -> t.Optional[exp.Expression]: 4799 if self._match_set(self.PRIMARY_PARSERS): 4800 token_type = self._prev.token_type 4801 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4802 4803 if token_type == TokenType.STRING: 4804 expressions = [primary] 4805 while self._match(TokenType.STRING): 4806 expressions.append(exp.Literal.string(self._prev.text)) 4807 4808 if len(expressions) > 1: 4809 return self.expression(exp.Concat, expressions=expressions) 4810 4811 return primary 4812 4813 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4814 return exp.Literal.number(f"0.{self._prev.text}") 4815 4816 if self._match(TokenType.L_PAREN): 4817 comments = self._prev_comments 4818 query = self._parse_select() 4819 4820 if query: 4821 expressions = [query] 4822 else: 4823 expressions = self._parse_expressions() 4824 4825 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4826 4827 if not this and self._match(TokenType.R_PAREN, advance=False): 4828 this = self.expression(exp.Tuple) 4829 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4830 this = self._parse_subquery(this=this, parse_alias=False) 4831 elif isinstance(this, exp.Subquery): 4832 this = self._parse_subquery( 4833 this=self._parse_set_operations(this), parse_alias=False 4834 ) 4835 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4836 this = self.expression(exp.Tuple, expressions=expressions) 4837 else: 4838 this = self.expression(exp.Paren, this=this) 4839 4840 if this: 4841 this.add_comments(comments) 4842 4843 self._match_r_paren(expression=this) 4844 return this 4845 4846 return None 4847 4848 def _parse_field( 4849 self, 4850 any_token: bool = False, 4851 tokens: t.Optional[t.Collection[TokenType]] = None, 4852 anonymous_func: bool = False, 4853 ) -> t.Optional[exp.Expression]: 4854 if anonymous_func: 4855 field = ( 4856 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4857 or self._parse_primary() 4858 ) 4859 else: 4860 field = self._parse_primary() or self._parse_function( 4861 anonymous=anonymous_func, any_token=any_token 4862 ) 4863 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4864 4865 def _parse_function( 4866 self, 4867 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4868 anonymous: bool = False, 4869 optional_parens: bool = True, 4870 any_token: bool = False, 4871 ) -> t.Optional[exp.Expression]: 4872 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4873 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4874 fn_syntax = False 4875 if ( 4876 self._match(TokenType.L_BRACE, advance=False) 4877 and self._next 4878 and self._next.text.upper() == "FN" 4879 ): 4880 self._advance(2) 4881 fn_syntax = True 4882 4883 func = self._parse_function_call( 4884 functions=functions, 4885 anonymous=anonymous, 4886 optional_parens=optional_parens, 4887 any_token=any_token, 4888 ) 4889 4890 if fn_syntax: 4891 self._match(TokenType.R_BRACE) 4892 4893 return func 4894 4895 def _parse_function_call( 4896 self, 4897 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4898 anonymous: bool = False, 4899 optional_parens: bool = True, 4900 any_token: bool = False, 4901 ) -> t.Optional[exp.Expression]: 4902 if not self._curr: 4903 return None 4904 4905 comments = self._curr.comments 4906 token_type = self._curr.token_type 4907 this = self._curr.text 4908 upper = this.upper() 4909 4910 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4911 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4912 self._advance() 4913 return self._parse_window(parser(self)) 4914 4915 if not self._next or self._next.token_type != TokenType.L_PAREN: 4916 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4917 self._advance() 4918 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4919 4920 return None 4921 4922 if any_token: 4923 if token_type in self.RESERVED_TOKENS: 4924 return None 4925 elif token_type not in self.FUNC_TOKENS: 4926 return None 4927 4928 self._advance(2) 4929 4930 parser = self.FUNCTION_PARSERS.get(upper) 4931 if parser and not anonymous: 4932 this = parser(self) 4933 else: 4934 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4935 4936 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4937 this = self.expression(subquery_predicate, this=self._parse_select()) 4938 self._match_r_paren() 4939 return this 4940 4941 if functions is None: 4942 functions = self.FUNCTIONS 4943 4944 function = functions.get(upper) 4945 4946 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4947 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4948 4949 if alias: 4950 args = self._kv_to_prop_eq(args) 4951 4952 if function and not anonymous: 4953 if "dialect" in function.__code__.co_varnames: 4954 func = function(args, dialect=self.dialect) 4955 else: 4956 func = function(args) 4957 4958 func = self.validate_expression(func, args) 4959 if not self.dialect.NORMALIZE_FUNCTIONS: 4960 func.meta["name"] = this 4961 4962 this = func 4963 else: 4964 if token_type == TokenType.IDENTIFIER: 4965 this = exp.Identifier(this=this, quoted=True) 4966 this = self.expression(exp.Anonymous, this=this, expressions=args) 4967 4968 if isinstance(this, exp.Expression): 4969 this.add_comments(comments) 4970 4971 self._match_r_paren(this) 4972 return self._parse_window(this) 4973 4974 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4975 transformed = [] 4976 4977 for e in expressions: 4978 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4979 if isinstance(e, exp.Alias): 4980 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4981 4982 if not isinstance(e, exp.PropertyEQ): 4983 e = self.expression( 4984 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4985 ) 4986 4987 if isinstance(e.this, exp.Column): 4988 e.this.replace(e.this.this) 4989 4990 transformed.append(e) 4991 4992 return transformed 4993 4994 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4995 return self._parse_column_def(self._parse_id_var()) 4996 4997 def _parse_user_defined_function( 4998 self, kind: t.Optional[TokenType] = None 4999 ) -> t.Optional[exp.Expression]: 5000 this = self._parse_id_var() 5001 5002 while self._match(TokenType.DOT): 5003 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5004 5005 if not self._match(TokenType.L_PAREN): 5006 return this 5007 5008 expressions = self._parse_csv(self._parse_function_parameter) 5009 self._match_r_paren() 5010 return self.expression( 5011 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5012 ) 5013 5014 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5015 literal = self._parse_primary() 5016 if literal: 5017 return self.expression(exp.Introducer, this=token.text, expression=literal) 5018 5019 return self.expression(exp.Identifier, this=token.text) 5020 5021 def _parse_session_parameter(self) -> exp.SessionParameter: 5022 kind = None 5023 this = self._parse_id_var() or self._parse_primary() 5024 5025 if this and self._match(TokenType.DOT): 5026 kind = this.name 5027 this = self._parse_var() or self._parse_primary() 5028 5029 return self.expression(exp.SessionParameter, this=this, kind=kind) 5030 5031 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5032 return self._parse_id_var() 5033 5034 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5035 index = self._index 5036 5037 if self._match(TokenType.L_PAREN): 5038 expressions = t.cast( 5039 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5040 ) 5041 5042 if not self._match(TokenType.R_PAREN): 5043 self._retreat(index) 5044 else: 5045 expressions = [self._parse_lambda_arg()] 5046 5047 if self._match_set(self.LAMBDAS): 5048 return self.LAMBDAS[self._prev.token_type](self, expressions) 5049 5050 self._retreat(index) 5051 5052 this: t.Optional[exp.Expression] 5053 5054 if self._match(TokenType.DISTINCT): 5055 this = self.expression( 5056 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5057 ) 5058 else: 5059 this = self._parse_select_or_expression(alias=alias) 5060 5061 return self._parse_limit( 5062 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5063 ) 5064 5065 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5066 index = self._index 5067 if not self._match(TokenType.L_PAREN): 5068 return this 5069 5070 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5071 # expr can be of both types 5072 if self._match_set(self.SELECT_START_TOKENS): 5073 self._retreat(index) 5074 return this 5075 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5076 self._match_r_paren() 5077 return self.expression(exp.Schema, this=this, expressions=args) 5078 5079 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5080 return self._parse_column_def(self._parse_field(any_token=True)) 5081 5082 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5083 # column defs are not really columns, they're identifiers 5084 if isinstance(this, exp.Column): 5085 this = this.this 5086 5087 kind = self._parse_types(schema=True) 5088 5089 if self._match_text_seq("FOR", "ORDINALITY"): 5090 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5091 5092 constraints: t.List[exp.Expression] = [] 5093 5094 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5095 ("ALIAS", "MATERIALIZED") 5096 ): 5097 persisted = self._prev.text.upper() == "MATERIALIZED" 5098 constraints.append( 5099 self.expression( 5100 exp.ComputedColumnConstraint, 5101 this=self._parse_assignment(), 5102 persisted=persisted or self._match_text_seq("PERSISTED"), 5103 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5104 ) 5105 ) 5106 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5107 self._match(TokenType.ALIAS) 5108 constraints.append( 5109 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5110 ) 5111 5112 while True: 5113 constraint = self._parse_column_constraint() 5114 if not constraint: 5115 break 5116 constraints.append(constraint) 5117 5118 if not kind and not constraints: 5119 return this 5120 5121 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5122 5123 def _parse_auto_increment( 5124 self, 5125 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5126 start = None 5127 increment = None 5128 5129 if self._match(TokenType.L_PAREN, advance=False): 5130 args = self._parse_wrapped_csv(self._parse_bitwise) 5131 start = seq_get(args, 0) 5132 increment = seq_get(args, 1) 5133 elif self._match_text_seq("START"): 5134 start = self._parse_bitwise() 5135 self._match_text_seq("INCREMENT") 5136 increment = self._parse_bitwise() 5137 5138 if start and increment: 5139 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5140 5141 return exp.AutoIncrementColumnConstraint() 5142 5143 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5144 if not self._match_text_seq("REFRESH"): 5145 self._retreat(self._index - 1) 5146 return None 5147 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5148 5149 def _parse_compress(self) -> exp.CompressColumnConstraint: 5150 if self._match(TokenType.L_PAREN, advance=False): 5151 return self.expression( 5152 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5153 ) 5154 5155 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5156 5157 def _parse_generated_as_identity( 5158 self, 5159 ) -> ( 5160 exp.GeneratedAsIdentityColumnConstraint 5161 | exp.ComputedColumnConstraint 5162 | exp.GeneratedAsRowColumnConstraint 5163 ): 5164 if self._match_text_seq("BY", "DEFAULT"): 5165 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5166 this = self.expression( 5167 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5168 ) 5169 else: 5170 self._match_text_seq("ALWAYS") 5171 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5172 5173 self._match(TokenType.ALIAS) 5174 5175 if self._match_text_seq("ROW"): 5176 start = self._match_text_seq("START") 5177 if not start: 5178 self._match(TokenType.END) 5179 hidden = self._match_text_seq("HIDDEN") 5180 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5181 5182 identity = self._match_text_seq("IDENTITY") 5183 5184 if self._match(TokenType.L_PAREN): 5185 if self._match(TokenType.START_WITH): 5186 this.set("start", self._parse_bitwise()) 5187 if self._match_text_seq("INCREMENT", "BY"): 5188 this.set("increment", self._parse_bitwise()) 5189 if self._match_text_seq("MINVALUE"): 5190 this.set("minvalue", self._parse_bitwise()) 5191 if self._match_text_seq("MAXVALUE"): 5192 this.set("maxvalue", self._parse_bitwise()) 5193 5194 if self._match_text_seq("CYCLE"): 5195 this.set("cycle", True) 5196 elif self._match_text_seq("NO", "CYCLE"): 5197 this.set("cycle", False) 5198 5199 if not identity: 5200 this.set("expression", self._parse_range()) 5201 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5202 args = self._parse_csv(self._parse_bitwise) 5203 this.set("start", seq_get(args, 0)) 5204 this.set("increment", seq_get(args, 1)) 5205 5206 self._match_r_paren() 5207 5208 return this 5209 5210 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5211 self._match_text_seq("LENGTH") 5212 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5213 5214 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5215 if self._match_text_seq("NULL"): 5216 return self.expression(exp.NotNullColumnConstraint) 5217 if self._match_text_seq("CASESPECIFIC"): 5218 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5219 if self._match_text_seq("FOR", "REPLICATION"): 5220 return self.expression(exp.NotForReplicationColumnConstraint) 5221 return None 5222 5223 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5224 if self._match(TokenType.CONSTRAINT): 5225 this = self._parse_id_var() 5226 else: 5227 this = None 5228 5229 if self._match_texts(self.CONSTRAINT_PARSERS): 5230 return self.expression( 5231 exp.ColumnConstraint, 5232 this=this, 5233 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5234 ) 5235 5236 return this 5237 5238 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5239 if not self._match(TokenType.CONSTRAINT): 5240 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5241 5242 return self.expression( 5243 exp.Constraint, 5244 this=self._parse_id_var(), 5245 expressions=self._parse_unnamed_constraints(), 5246 ) 5247 5248 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5249 constraints = [] 5250 while True: 5251 constraint = self._parse_unnamed_constraint() or self._parse_function() 5252 if not constraint: 5253 break 5254 constraints.append(constraint) 5255 5256 return constraints 5257 5258 def _parse_unnamed_constraint( 5259 self, constraints: t.Optional[t.Collection[str]] = None 5260 ) -> t.Optional[exp.Expression]: 5261 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5262 constraints or self.CONSTRAINT_PARSERS 5263 ): 5264 return None 5265 5266 constraint = self._prev.text.upper() 5267 if constraint not in self.CONSTRAINT_PARSERS: 5268 self.raise_error(f"No parser found for schema constraint {constraint}.") 5269 5270 return self.CONSTRAINT_PARSERS[constraint](self) 5271 5272 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5273 return self._parse_id_var(any_token=False) 5274 5275 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5276 self._match_text_seq("KEY") 5277 return self.expression( 5278 exp.UniqueColumnConstraint, 5279 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5280 this=self._parse_schema(self._parse_unique_key()), 5281 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5282 on_conflict=self._parse_on_conflict(), 5283 ) 5284 5285 def _parse_key_constraint_options(self) -> t.List[str]: 5286 options = [] 5287 while True: 5288 if not self._curr: 5289 break 5290 5291 if self._match(TokenType.ON): 5292 action = None 5293 on = self._advance_any() and self._prev.text 5294 5295 if self._match_text_seq("NO", "ACTION"): 5296 action = "NO ACTION" 5297 elif self._match_text_seq("CASCADE"): 5298 action = "CASCADE" 5299 elif self._match_text_seq("RESTRICT"): 5300 action = "RESTRICT" 5301 elif self._match_pair(TokenType.SET, TokenType.NULL): 5302 action = "SET NULL" 5303 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5304 action = "SET DEFAULT" 5305 else: 5306 self.raise_error("Invalid key constraint") 5307 5308 options.append(f"ON {on} {action}") 5309 else: 5310 var = self._parse_var_from_options( 5311 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5312 ) 5313 if not var: 5314 break 5315 options.append(var.name) 5316 5317 return options 5318 5319 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5320 if match and not self._match(TokenType.REFERENCES): 5321 return None 5322 5323 expressions = None 5324 this = self._parse_table(schema=True) 5325 options = self._parse_key_constraint_options() 5326 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5327 5328 def _parse_foreign_key(self) -> exp.ForeignKey: 5329 expressions = self._parse_wrapped_id_vars() 5330 reference = self._parse_references() 5331 options = {} 5332 5333 while self._match(TokenType.ON): 5334 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5335 self.raise_error("Expected DELETE or UPDATE") 5336 5337 kind = self._prev.text.lower() 5338 5339 if self._match_text_seq("NO", "ACTION"): 5340 action = "NO ACTION" 5341 elif self._match(TokenType.SET): 5342 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5343 action = "SET " + self._prev.text.upper() 5344 else: 5345 self._advance() 5346 action = self._prev.text.upper() 5347 5348 options[kind] = action 5349 5350 return self.expression( 5351 exp.ForeignKey, 5352 expressions=expressions, 5353 reference=reference, 5354 **options, # type: ignore 5355 ) 5356 5357 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5358 return self._parse_field() 5359 5360 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5361 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5362 self._retreat(self._index - 1) 5363 return None 5364 5365 id_vars = self._parse_wrapped_id_vars() 5366 return self.expression( 5367 exp.PeriodForSystemTimeConstraint, 5368 this=seq_get(id_vars, 0), 5369 expression=seq_get(id_vars, 1), 5370 ) 5371 5372 def _parse_primary_key( 5373 self, wrapped_optional: bool = False, in_props: bool = False 5374 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5375 desc = ( 5376 self._match_set((TokenType.ASC, TokenType.DESC)) 5377 and self._prev.token_type == TokenType.DESC 5378 ) 5379 5380 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5381 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5382 5383 expressions = self._parse_wrapped_csv( 5384 self._parse_primary_key_part, optional=wrapped_optional 5385 ) 5386 options = self._parse_key_constraint_options() 5387 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5388 5389 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5390 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5391 5392 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5393 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5394 return this 5395 5396 bracket_kind = self._prev.token_type 5397 expressions = self._parse_csv( 5398 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5399 ) 5400 5401 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5402 self.raise_error("Expected ]") 5403 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5404 self.raise_error("Expected }") 5405 5406 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5407 if bracket_kind == TokenType.L_BRACE: 5408 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5409 elif not this: 5410 this = self.expression(exp.Array, expressions=expressions) 5411 else: 5412 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5413 if constructor_type: 5414 return self.expression(constructor_type, expressions=expressions) 5415 5416 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5417 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5418 5419 self._add_comments(this) 5420 return self._parse_bracket(this) 5421 5422 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5423 if self._match(TokenType.COLON): 5424 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5425 return this 5426 5427 def _parse_case(self) -> t.Optional[exp.Expression]: 5428 ifs = [] 5429 default = None 5430 5431 comments = self._prev_comments 5432 expression = self._parse_assignment() 5433 5434 while self._match(TokenType.WHEN): 5435 this = self._parse_assignment() 5436 self._match(TokenType.THEN) 5437 then = self._parse_assignment() 5438 ifs.append(self.expression(exp.If, this=this, true=then)) 5439 5440 if self._match(TokenType.ELSE): 5441 default = self._parse_assignment() 5442 5443 if not self._match(TokenType.END): 5444 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5445 default = exp.column("interval") 5446 else: 5447 self.raise_error("Expected END after CASE", self._prev) 5448 5449 return self.expression( 5450 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5451 ) 5452 5453 def _parse_if(self) -> t.Optional[exp.Expression]: 5454 if self._match(TokenType.L_PAREN): 5455 args = self._parse_csv(self._parse_assignment) 5456 this = self.validate_expression(exp.If.from_arg_list(args), args) 5457 self._match_r_paren() 5458 else: 5459 index = self._index - 1 5460 5461 if self.NO_PAREN_IF_COMMANDS and index == 0: 5462 return self._parse_as_command(self._prev) 5463 5464 condition = self._parse_assignment() 5465 5466 if not condition: 5467 self._retreat(index) 5468 return None 5469 5470 self._match(TokenType.THEN) 5471 true = self._parse_assignment() 5472 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5473 self._match(TokenType.END) 5474 this = self.expression(exp.If, this=condition, true=true, false=false) 5475 5476 return this 5477 5478 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5479 if not self._match_text_seq("VALUE", "FOR"): 5480 self._retreat(self._index - 1) 5481 return None 5482 5483 return self.expression( 5484 exp.NextValueFor, 5485 this=self._parse_column(), 5486 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5487 ) 5488 5489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5490 this = self._parse_function() or self._parse_var_or_string(upper=True) 5491 5492 if self._match(TokenType.FROM): 5493 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5494 5495 if not self._match(TokenType.COMMA): 5496 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5497 5498 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5499 5500 def _parse_gap_fill(self) -> exp.GapFill: 5501 self._match(TokenType.TABLE) 5502 this = self._parse_table() 5503 5504 self._match(TokenType.COMMA) 5505 args = [this, *self._parse_csv(self._parse_lambda)] 5506 5507 gap_fill = exp.GapFill.from_arg_list(args) 5508 return self.validate_expression(gap_fill, args) 5509 5510 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5511 this = self._parse_assignment() 5512 5513 if not self._match(TokenType.ALIAS): 5514 if self._match(TokenType.COMMA): 5515 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5516 5517 self.raise_error("Expected AS after CAST") 5518 5519 fmt = None 5520 to = self._parse_types() 5521 5522 if self._match(TokenType.FORMAT): 5523 fmt_string = self._parse_string() 5524 fmt = self._parse_at_time_zone(fmt_string) 5525 5526 if not to: 5527 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5528 if to.this in exp.DataType.TEMPORAL_TYPES: 5529 this = self.expression( 5530 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5531 this=this, 5532 format=exp.Literal.string( 5533 format_time( 5534 fmt_string.this if fmt_string else "", 5535 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5536 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5537 ) 5538 ), 5539 safe=safe, 5540 ) 5541 5542 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5543 this.set("zone", fmt.args["zone"]) 5544 return this 5545 elif not to: 5546 self.raise_error("Expected TYPE after CAST") 5547 elif isinstance(to, exp.Identifier): 5548 to = exp.DataType.build(to.name, udt=True) 5549 elif to.this == exp.DataType.Type.CHAR: 5550 if self._match(TokenType.CHARACTER_SET): 5551 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5552 5553 return self.expression( 5554 exp.Cast if strict else exp.TryCast, 5555 this=this, 5556 to=to, 5557 format=fmt, 5558 safe=safe, 5559 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5560 ) 5561 5562 def _parse_string_agg(self) -> exp.Expression: 5563 if self._match(TokenType.DISTINCT): 5564 args: t.List[t.Optional[exp.Expression]] = [ 5565 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5566 ] 5567 if self._match(TokenType.COMMA): 5568 args.extend(self._parse_csv(self._parse_assignment)) 5569 else: 5570 args = self._parse_csv(self._parse_assignment) # type: ignore 5571 5572 index = self._index 5573 if not self._match(TokenType.R_PAREN) and args: 5574 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5575 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5576 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5577 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5578 5579 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5580 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5581 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5582 if not self._match_text_seq("WITHIN", "GROUP"): 5583 self._retreat(index) 5584 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5585 5586 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5587 order = self._parse_order(this=seq_get(args, 0)) 5588 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5589 5590 def _parse_convert( 5591 self, strict: bool, safe: t.Optional[bool] = None 5592 ) -> t.Optional[exp.Expression]: 5593 this = self._parse_bitwise() 5594 5595 if self._match(TokenType.USING): 5596 to: t.Optional[exp.Expression] = self.expression( 5597 exp.CharacterSet, this=self._parse_var() 5598 ) 5599 elif self._match(TokenType.COMMA): 5600 to = self._parse_types() 5601 else: 5602 to = None 5603 5604 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5605 5606 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5607 """ 5608 There are generally two variants of the DECODE function: 5609 5610 - DECODE(bin, charset) 5611 - DECODE(expression, search, result [, search, result] ... [, default]) 5612 5613 The second variant will always be parsed into a CASE expression. Note that NULL 5614 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5615 instead of relying on pattern matching. 5616 """ 5617 args = self._parse_csv(self._parse_assignment) 5618 5619 if len(args) < 3: 5620 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5621 5622 expression, *expressions = args 5623 if not expression: 5624 return None 5625 5626 ifs = [] 5627 for search, result in zip(expressions[::2], expressions[1::2]): 5628 if not search or not result: 5629 return None 5630 5631 if isinstance(search, exp.Literal): 5632 ifs.append( 5633 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5634 ) 5635 elif isinstance(search, exp.Null): 5636 ifs.append( 5637 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5638 ) 5639 else: 5640 cond = exp.or_( 5641 exp.EQ(this=expression.copy(), expression=search), 5642 exp.and_( 5643 exp.Is(this=expression.copy(), expression=exp.Null()), 5644 exp.Is(this=search.copy(), expression=exp.Null()), 5645 copy=False, 5646 ), 5647 copy=False, 5648 ) 5649 ifs.append(exp.If(this=cond, true=result)) 5650 5651 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5652 5653 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5654 self._match_text_seq("KEY") 5655 key = self._parse_column() 5656 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5657 self._match_text_seq("VALUE") 5658 value = self._parse_bitwise() 5659 5660 if not key and not value: 5661 return None 5662 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5663 5664 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5665 if not this or not self._match_text_seq("FORMAT", "JSON"): 5666 return this 5667 5668 return self.expression(exp.FormatJson, this=this) 5669 5670 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5671 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5672 for value in values: 5673 if self._match_text_seq(value, "ON", on): 5674 return f"{value} ON {on}" 5675 5676 return None 5677 5678 @t.overload 5679 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5680 5681 @t.overload 5682 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5683 5684 def _parse_json_object(self, agg=False): 5685 star = self._parse_star() 5686 expressions = ( 5687 [star] 5688 if star 5689 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5690 ) 5691 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5692 5693 unique_keys = None 5694 if self._match_text_seq("WITH", "UNIQUE"): 5695 unique_keys = True 5696 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5697 unique_keys = False 5698 5699 self._match_text_seq("KEYS") 5700 5701 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5702 self._parse_type() 5703 ) 5704 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5705 5706 return self.expression( 5707 exp.JSONObjectAgg if agg else exp.JSONObject, 5708 expressions=expressions, 5709 null_handling=null_handling, 5710 unique_keys=unique_keys, 5711 return_type=return_type, 5712 encoding=encoding, 5713 ) 5714 5715 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5716 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5717 if not self._match_text_seq("NESTED"): 5718 this = self._parse_id_var() 5719 kind = self._parse_types(allow_identifiers=False) 5720 nested = None 5721 else: 5722 this = None 5723 kind = None 5724 nested = True 5725 5726 path = self._match_text_seq("PATH") and self._parse_string() 5727 nested_schema = nested and self._parse_json_schema() 5728 5729 return self.expression( 5730 exp.JSONColumnDef, 5731 this=this, 5732 kind=kind, 5733 path=path, 5734 nested_schema=nested_schema, 5735 ) 5736 5737 def _parse_json_schema(self) -> exp.JSONSchema: 5738 self._match_text_seq("COLUMNS") 5739 return self.expression( 5740 exp.JSONSchema, 5741 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5742 ) 5743 5744 def _parse_json_table(self) -> exp.JSONTable: 5745 this = self._parse_format_json(self._parse_bitwise()) 5746 path = self._match(TokenType.COMMA) and self._parse_string() 5747 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5748 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5749 schema = self._parse_json_schema() 5750 5751 return exp.JSONTable( 5752 this=this, 5753 schema=schema, 5754 path=path, 5755 error_handling=error_handling, 5756 empty_handling=empty_handling, 5757 ) 5758 5759 def _parse_match_against(self) -> exp.MatchAgainst: 5760 expressions = self._parse_csv(self._parse_column) 5761 5762 self._match_text_seq(")", "AGAINST", "(") 5763 5764 this = self._parse_string() 5765 5766 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5767 modifier = "IN NATURAL LANGUAGE MODE" 5768 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5769 modifier = f"{modifier} WITH QUERY EXPANSION" 5770 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5771 modifier = "IN BOOLEAN MODE" 5772 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5773 modifier = "WITH QUERY EXPANSION" 5774 else: 5775 modifier = None 5776 5777 return self.expression( 5778 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5779 ) 5780 5781 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5782 def _parse_open_json(self) -> exp.OpenJSON: 5783 this = self._parse_bitwise() 5784 path = self._match(TokenType.COMMA) and self._parse_string() 5785 5786 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5787 this = self._parse_field(any_token=True) 5788 kind = self._parse_types() 5789 path = self._parse_string() 5790 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5791 5792 return self.expression( 5793 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5794 ) 5795 5796 expressions = None 5797 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5798 self._match_l_paren() 5799 expressions = self._parse_csv(_parse_open_json_column_def) 5800 5801 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5802 5803 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5804 args = self._parse_csv(self._parse_bitwise) 5805 5806 if self._match(TokenType.IN): 5807 return self.expression( 5808 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5809 ) 5810 5811 if haystack_first: 5812 haystack = seq_get(args, 0) 5813 needle = seq_get(args, 1) 5814 else: 5815 needle = seq_get(args, 0) 5816 haystack = seq_get(args, 1) 5817 5818 return self.expression( 5819 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5820 ) 5821 5822 def _parse_predict(self) -> exp.Predict: 5823 self._match_text_seq("MODEL") 5824 this = self._parse_table() 5825 5826 self._match(TokenType.COMMA) 5827 self._match_text_seq("TABLE") 5828 5829 return self.expression( 5830 exp.Predict, 5831 this=this, 5832 expression=self._parse_table(), 5833 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5834 ) 5835 5836 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5837 args = self._parse_csv(self._parse_table) 5838 return exp.JoinHint(this=func_name.upper(), expressions=args) 5839 5840 def _parse_substring(self) -> exp.Substring: 5841 # Postgres supports the form: substring(string [from int] [for int]) 5842 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5843 5844 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5845 5846 if self._match(TokenType.FROM): 5847 args.append(self._parse_bitwise()) 5848 if self._match(TokenType.FOR): 5849 if len(args) == 1: 5850 args.append(exp.Literal.number(1)) 5851 args.append(self._parse_bitwise()) 5852 5853 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5854 5855 def _parse_trim(self) -> exp.Trim: 5856 # https://www.w3resource.com/sql/character-functions/trim.php 5857 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5858 5859 position = None 5860 collation = None 5861 expression = None 5862 5863 if self._match_texts(self.TRIM_TYPES): 5864 position = self._prev.text.upper() 5865 5866 this = self._parse_bitwise() 5867 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5868 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5869 expression = self._parse_bitwise() 5870 5871 if invert_order: 5872 this, expression = expression, this 5873 5874 if self._match(TokenType.COLLATE): 5875 collation = self._parse_bitwise() 5876 5877 return self.expression( 5878 exp.Trim, this=this, position=position, expression=expression, collation=collation 5879 ) 5880 5881 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5882 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5883 5884 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5885 return self._parse_window(self._parse_id_var(), alias=True) 5886 5887 def _parse_respect_or_ignore_nulls( 5888 self, this: t.Optional[exp.Expression] 5889 ) -> t.Optional[exp.Expression]: 5890 if self._match_text_seq("IGNORE", "NULLS"): 5891 return self.expression(exp.IgnoreNulls, this=this) 5892 if self._match_text_seq("RESPECT", "NULLS"): 5893 return self.expression(exp.RespectNulls, this=this) 5894 return this 5895 5896 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5897 if self._match(TokenType.HAVING): 5898 self._match_texts(("MAX", "MIN")) 5899 max = self._prev.text.upper() != "MIN" 5900 return self.expression( 5901 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5902 ) 5903 5904 return this 5905 5906 def _parse_window( 5907 self, this: t.Optional[exp.Expression], alias: bool = False 5908 ) -> t.Optional[exp.Expression]: 5909 func = this 5910 comments = func.comments if isinstance(func, exp.Expression) else None 5911 5912 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5913 self._match(TokenType.WHERE) 5914 this = self.expression( 5915 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5916 ) 5917 self._match_r_paren() 5918 5919 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5920 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5921 if self._match_text_seq("WITHIN", "GROUP"): 5922 order = self._parse_wrapped(self._parse_order) 5923 this = self.expression(exp.WithinGroup, this=this, expression=order) 5924 5925 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5926 # Some dialects choose to implement and some do not. 5927 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5928 5929 # There is some code above in _parse_lambda that handles 5930 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5931 5932 # The below changes handle 5933 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5934 5935 # Oracle allows both formats 5936 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5937 # and Snowflake chose to do the same for familiarity 5938 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5939 if isinstance(this, exp.AggFunc): 5940 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5941 5942 if ignore_respect and ignore_respect is not this: 5943 ignore_respect.replace(ignore_respect.this) 5944 this = self.expression(ignore_respect.__class__, this=this) 5945 5946 this = self._parse_respect_or_ignore_nulls(this) 5947 5948 # bigquery select from window x AS (partition by ...) 5949 if alias: 5950 over = None 5951 self._match(TokenType.ALIAS) 5952 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5953 return this 5954 else: 5955 over = self._prev.text.upper() 5956 5957 if comments and isinstance(func, exp.Expression): 5958 func.pop_comments() 5959 5960 if not self._match(TokenType.L_PAREN): 5961 return self.expression( 5962 exp.Window, 5963 comments=comments, 5964 this=this, 5965 alias=self._parse_id_var(False), 5966 over=over, 5967 ) 5968 5969 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5970 5971 first = self._match(TokenType.FIRST) 5972 if self._match_text_seq("LAST"): 5973 first = False 5974 5975 partition, order = self._parse_partition_and_order() 5976 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5977 5978 if kind: 5979 self._match(TokenType.BETWEEN) 5980 start = self._parse_window_spec() 5981 self._match(TokenType.AND) 5982 end = self._parse_window_spec() 5983 5984 spec = self.expression( 5985 exp.WindowSpec, 5986 kind=kind, 5987 start=start["value"], 5988 start_side=start["side"], 5989 end=end["value"], 5990 end_side=end["side"], 5991 ) 5992 else: 5993 spec = None 5994 5995 self._match_r_paren() 5996 5997 window = self.expression( 5998 exp.Window, 5999 comments=comments, 6000 this=this, 6001 partition_by=partition, 6002 order=order, 6003 spec=spec, 6004 alias=window_alias, 6005 over=over, 6006 first=first, 6007 ) 6008 6009 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6010 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6011 return self._parse_window(window, alias=alias) 6012 6013 return window 6014 6015 def _parse_partition_and_order( 6016 self, 6017 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6018 return self._parse_partition_by(), self._parse_order() 6019 6020 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6021 self._match(TokenType.BETWEEN) 6022 6023 return { 6024 "value": ( 6025 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6026 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6027 or self._parse_bitwise() 6028 ), 6029 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6030 } 6031 6032 def _parse_alias( 6033 self, this: t.Optional[exp.Expression], explicit: bool = False 6034 ) -> t.Optional[exp.Expression]: 6035 any_token = self._match(TokenType.ALIAS) 6036 comments = self._prev_comments or [] 6037 6038 if explicit and not any_token: 6039 return this 6040 6041 if self._match(TokenType.L_PAREN): 6042 aliases = self.expression( 6043 exp.Aliases, 6044 comments=comments, 6045 this=this, 6046 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6047 ) 6048 self._match_r_paren(aliases) 6049 return aliases 6050 6051 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6052 self.STRING_ALIASES and self._parse_string_as_identifier() 6053 ) 6054 6055 if alias: 6056 comments.extend(alias.pop_comments()) 6057 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6058 column = this.this 6059 6060 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6061 if not this.comments and column and column.comments: 6062 this.comments = column.pop_comments() 6063 6064 return this 6065 6066 def _parse_id_var( 6067 self, 6068 any_token: bool = True, 6069 tokens: t.Optional[t.Collection[TokenType]] = None, 6070 ) -> t.Optional[exp.Expression]: 6071 expression = self._parse_identifier() 6072 if not expression and ( 6073 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6074 ): 6075 quoted = self._prev.token_type == TokenType.STRING 6076 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6077 6078 return expression 6079 6080 def _parse_string(self) -> t.Optional[exp.Expression]: 6081 if self._match_set(self.STRING_PARSERS): 6082 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6083 return self._parse_placeholder() 6084 6085 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6086 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6087 6088 def _parse_number(self) -> t.Optional[exp.Expression]: 6089 if self._match_set(self.NUMERIC_PARSERS): 6090 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6091 return self._parse_placeholder() 6092 6093 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6094 if self._match(TokenType.IDENTIFIER): 6095 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6096 return self._parse_placeholder() 6097 6098 def _parse_var( 6099 self, 6100 any_token: bool = False, 6101 tokens: t.Optional[t.Collection[TokenType]] = None, 6102 upper: bool = False, 6103 ) -> t.Optional[exp.Expression]: 6104 if ( 6105 (any_token and self._advance_any()) 6106 or self._match(TokenType.VAR) 6107 or (self._match_set(tokens) if tokens else False) 6108 ): 6109 return self.expression( 6110 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6111 ) 6112 return self._parse_placeholder() 6113 6114 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6115 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6116 self._advance() 6117 return self._prev 6118 return None 6119 6120 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6121 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6122 6123 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6124 return self._parse_primary() or self._parse_var(any_token=True) 6125 6126 def _parse_null(self) -> t.Optional[exp.Expression]: 6127 if self._match_set(self.NULL_TOKENS): 6128 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6129 return self._parse_placeholder() 6130 6131 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6132 if self._match(TokenType.TRUE): 6133 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6134 if self._match(TokenType.FALSE): 6135 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6136 return self._parse_placeholder() 6137 6138 def _parse_star(self) -> t.Optional[exp.Expression]: 6139 if self._match(TokenType.STAR): 6140 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6141 return self._parse_placeholder() 6142 6143 def _parse_parameter(self) -> exp.Parameter: 6144 this = self._parse_identifier() or self._parse_primary_or_var() 6145 return self.expression(exp.Parameter, this=this) 6146 6147 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6148 if self._match_set(self.PLACEHOLDER_PARSERS): 6149 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6150 if placeholder: 6151 return placeholder 6152 self._advance(-1) 6153 return None 6154 6155 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6156 if not self._match_texts(keywords): 6157 return None 6158 if self._match(TokenType.L_PAREN, advance=False): 6159 return self._parse_wrapped_csv(self._parse_expression) 6160 6161 expression = self._parse_expression() 6162 return [expression] if expression else None 6163 6164 def _parse_csv( 6165 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6166 ) -> t.List[exp.Expression]: 6167 parse_result = parse_method() 6168 items = [parse_result] if parse_result is not None else [] 6169 6170 while self._match(sep): 6171 self._add_comments(parse_result) 6172 parse_result = parse_method() 6173 if parse_result is not None: 6174 items.append(parse_result) 6175 6176 return items 6177 6178 def _parse_tokens( 6179 self, parse_method: t.Callable, expressions: t.Dict 6180 ) -> t.Optional[exp.Expression]: 6181 this = parse_method() 6182 6183 while self._match_set(expressions): 6184 this = self.expression( 6185 expressions[self._prev.token_type], 6186 this=this, 6187 comments=self._prev_comments, 6188 expression=parse_method(), 6189 ) 6190 6191 return this 6192 6193 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6194 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6195 6196 def _parse_wrapped_csv( 6197 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6198 ) -> t.List[exp.Expression]: 6199 return self._parse_wrapped( 6200 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6201 ) 6202 6203 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6204 wrapped = self._match(TokenType.L_PAREN) 6205 if not wrapped and not optional: 6206 self.raise_error("Expecting (") 6207 parse_result = parse_method() 6208 if wrapped: 6209 self._match_r_paren() 6210 return parse_result 6211 6212 def _parse_expressions(self) -> t.List[exp.Expression]: 6213 return self._parse_csv(self._parse_expression) 6214 6215 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6216 return self._parse_select() or self._parse_set_operations( 6217 self._parse_expression() if alias else self._parse_assignment() 6218 ) 6219 6220 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6221 return self._parse_query_modifiers( 6222 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6223 ) 6224 6225 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6226 this = None 6227 if self._match_texts(self.TRANSACTION_KIND): 6228 this = self._prev.text 6229 6230 self._match_texts(("TRANSACTION", "WORK")) 6231 6232 modes = [] 6233 while True: 6234 mode = [] 6235 while self._match(TokenType.VAR): 6236 mode.append(self._prev.text) 6237 6238 if mode: 6239 modes.append(" ".join(mode)) 6240 if not self._match(TokenType.COMMA): 6241 break 6242 6243 return self.expression(exp.Transaction, this=this, modes=modes) 6244 6245 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6246 chain = None 6247 savepoint = None 6248 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6249 6250 self._match_texts(("TRANSACTION", "WORK")) 6251 6252 if self._match_text_seq("TO"): 6253 self._match_text_seq("SAVEPOINT") 6254 savepoint = self._parse_id_var() 6255 6256 if self._match(TokenType.AND): 6257 chain = not self._match_text_seq("NO") 6258 self._match_text_seq("CHAIN") 6259 6260 if is_rollback: 6261 return self.expression(exp.Rollback, savepoint=savepoint) 6262 6263 return self.expression(exp.Commit, chain=chain) 6264 6265 def _parse_refresh(self) -> exp.Refresh: 6266 self._match(TokenType.TABLE) 6267 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6268 6269 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6270 if not self._match_text_seq("ADD"): 6271 return None 6272 6273 self._match(TokenType.COLUMN) 6274 exists_column = self._parse_exists(not_=True) 6275 expression = self._parse_field_def() 6276 6277 if expression: 6278 expression.set("exists", exists_column) 6279 6280 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6281 if self._match_texts(("FIRST", "AFTER")): 6282 position = self._prev.text 6283 column_position = self.expression( 6284 exp.ColumnPosition, this=self._parse_column(), position=position 6285 ) 6286 expression.set("position", column_position) 6287 6288 return expression 6289 6290 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6291 drop = self._match(TokenType.DROP) and self._parse_drop() 6292 if drop and not isinstance(drop, exp.Command): 6293 drop.set("kind", drop.args.get("kind", "COLUMN")) 6294 return drop 6295 6296 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6297 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6298 return self.expression( 6299 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6300 ) 6301 6302 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6303 index = self._index - 1 6304 6305 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6306 return self._parse_csv( 6307 lambda: self.expression( 6308 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6309 ) 6310 ) 6311 6312 self._retreat(index) 6313 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6314 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6315 6316 if self._match_text_seq("ADD", "COLUMNS"): 6317 schema = self._parse_schema() 6318 if schema: 6319 return [schema] 6320 return [] 6321 6322 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6323 6324 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6325 if self._match_texts(self.ALTER_ALTER_PARSERS): 6326 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6327 6328 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6329 # keyword after ALTER we default to parsing this statement 6330 self._match(TokenType.COLUMN) 6331 column = self._parse_field(any_token=True) 6332 6333 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6334 return self.expression(exp.AlterColumn, this=column, drop=True) 6335 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6336 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6337 if self._match(TokenType.COMMENT): 6338 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6339 if self._match_text_seq("DROP", "NOT", "NULL"): 6340 return self.expression( 6341 exp.AlterColumn, 6342 this=column, 6343 drop=True, 6344 allow_null=True, 6345 ) 6346 if self._match_text_seq("SET", "NOT", "NULL"): 6347 return self.expression( 6348 exp.AlterColumn, 6349 this=column, 6350 allow_null=False, 6351 ) 6352 self._match_text_seq("SET", "DATA") 6353 self._match_text_seq("TYPE") 6354 return self.expression( 6355 exp.AlterColumn, 6356 this=column, 6357 dtype=self._parse_types(), 6358 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6359 using=self._match(TokenType.USING) and self._parse_assignment(), 6360 ) 6361 6362 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6363 if self._match_texts(("ALL", "EVEN", "AUTO")): 6364 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6365 6366 self._match_text_seq("KEY", "DISTKEY") 6367 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6368 6369 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6370 if compound: 6371 self._match_text_seq("SORTKEY") 6372 6373 if self._match(TokenType.L_PAREN, advance=False): 6374 return self.expression( 6375 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6376 ) 6377 6378 self._match_texts(("AUTO", "NONE")) 6379 return self.expression( 6380 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6381 ) 6382 6383 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6384 index = self._index - 1 6385 6386 partition_exists = self._parse_exists() 6387 if self._match(TokenType.PARTITION, advance=False): 6388 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6389 6390 self._retreat(index) 6391 return self._parse_csv(self._parse_drop_column) 6392 6393 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6394 if self._match(TokenType.COLUMN): 6395 exists = self._parse_exists() 6396 old_column = self._parse_column() 6397 to = self._match_text_seq("TO") 6398 new_column = self._parse_column() 6399 6400 if old_column is None or to is None or new_column is None: 6401 return None 6402 6403 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6404 6405 self._match_text_seq("TO") 6406 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6407 6408 def _parse_alter_table_set(self) -> exp.AlterSet: 6409 alter_set = self.expression(exp.AlterSet) 6410 6411 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6412 "TABLE", "PROPERTIES" 6413 ): 6414 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6415 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6416 alter_set.set("expressions", [self._parse_assignment()]) 6417 elif self._match_texts(("LOGGED", "UNLOGGED")): 6418 alter_set.set("option", exp.var(self._prev.text.upper())) 6419 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6420 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6421 elif self._match_text_seq("LOCATION"): 6422 alter_set.set("location", self._parse_field()) 6423 elif self._match_text_seq("ACCESS", "METHOD"): 6424 alter_set.set("access_method", self._parse_field()) 6425 elif self._match_text_seq("TABLESPACE"): 6426 alter_set.set("tablespace", self._parse_field()) 6427 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6428 alter_set.set("file_format", [self._parse_field()]) 6429 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6430 alter_set.set("file_format", self._parse_wrapped_options()) 6431 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6432 alter_set.set("copy_options", self._parse_wrapped_options()) 6433 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6434 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6435 else: 6436 if self._match_text_seq("SERDE"): 6437 alter_set.set("serde", self._parse_field()) 6438 6439 alter_set.set("expressions", [self._parse_properties()]) 6440 6441 return alter_set 6442 6443 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6444 start = self._prev 6445 6446 if not self._match(TokenType.TABLE): 6447 return self._parse_as_command(start) 6448 6449 exists = self._parse_exists() 6450 only = self._match_text_seq("ONLY") 6451 this = self._parse_table(schema=True) 6452 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6453 6454 if self._next: 6455 self._advance() 6456 6457 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6458 if parser: 6459 actions = ensure_list(parser(self)) 6460 options = self._parse_csv(self._parse_property) 6461 6462 if not self._curr and actions: 6463 return self.expression( 6464 exp.AlterTable, 6465 this=this, 6466 exists=exists, 6467 actions=actions, 6468 only=only, 6469 options=options, 6470 cluster=cluster, 6471 ) 6472 6473 return self._parse_as_command(start) 6474 6475 def _parse_merge(self) -> exp.Merge: 6476 self._match(TokenType.INTO) 6477 target = self._parse_table() 6478 6479 if target and self._match(TokenType.ALIAS, advance=False): 6480 target.set("alias", self._parse_table_alias()) 6481 6482 self._match(TokenType.USING) 6483 using = self._parse_table() 6484 6485 self._match(TokenType.ON) 6486 on = self._parse_assignment() 6487 6488 return self.expression( 6489 exp.Merge, 6490 this=target, 6491 using=using, 6492 on=on, 6493 expressions=self._parse_when_matched(), 6494 ) 6495 6496 def _parse_when_matched(self) -> t.List[exp.When]: 6497 whens = [] 6498 6499 while self._match(TokenType.WHEN): 6500 matched = not self._match(TokenType.NOT) 6501 self._match_text_seq("MATCHED") 6502 source = ( 6503 False 6504 if self._match_text_seq("BY", "TARGET") 6505 else self._match_text_seq("BY", "SOURCE") 6506 ) 6507 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6508 6509 self._match(TokenType.THEN) 6510 6511 if self._match(TokenType.INSERT): 6512 _this = self._parse_star() 6513 if _this: 6514 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6515 else: 6516 then = self.expression( 6517 exp.Insert, 6518 this=self._parse_value(), 6519 expression=self._match_text_seq("VALUES") and self._parse_value(), 6520 ) 6521 elif self._match(TokenType.UPDATE): 6522 expressions = self._parse_star() 6523 if expressions: 6524 then = self.expression(exp.Update, expressions=expressions) 6525 else: 6526 then = self.expression( 6527 exp.Update, 6528 expressions=self._match(TokenType.SET) 6529 and self._parse_csv(self._parse_equality), 6530 ) 6531 elif self._match(TokenType.DELETE): 6532 then = self.expression(exp.Var, this=self._prev.text) 6533 else: 6534 then = None 6535 6536 whens.append( 6537 self.expression( 6538 exp.When, 6539 matched=matched, 6540 source=source, 6541 condition=condition, 6542 then=then, 6543 ) 6544 ) 6545 return whens 6546 6547 def _parse_show(self) -> t.Optional[exp.Expression]: 6548 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6549 if parser: 6550 return parser(self) 6551 return self._parse_as_command(self._prev) 6552 6553 def _parse_set_item_assignment( 6554 self, kind: t.Optional[str] = None 6555 ) -> t.Optional[exp.Expression]: 6556 index = self._index 6557 6558 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6559 return self._parse_set_transaction(global_=kind == "GLOBAL") 6560 6561 left = self._parse_primary() or self._parse_column() 6562 assignment_delimiter = self._match_texts(("=", "TO")) 6563 6564 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6565 self._retreat(index) 6566 return None 6567 6568 right = self._parse_statement() or self._parse_id_var() 6569 if isinstance(right, (exp.Column, exp.Identifier)): 6570 right = exp.var(right.name) 6571 6572 this = self.expression(exp.EQ, this=left, expression=right) 6573 return self.expression(exp.SetItem, this=this, kind=kind) 6574 6575 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6576 self._match_text_seq("TRANSACTION") 6577 characteristics = self._parse_csv( 6578 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6579 ) 6580 return self.expression( 6581 exp.SetItem, 6582 expressions=characteristics, 6583 kind="TRANSACTION", 6584 **{"global": global_}, # type: ignore 6585 ) 6586 6587 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6588 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6589 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6590 6591 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6592 index = self._index 6593 set_ = self.expression( 6594 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6595 ) 6596 6597 if self._curr: 6598 self._retreat(index) 6599 return self._parse_as_command(self._prev) 6600 6601 return set_ 6602 6603 def _parse_var_from_options( 6604 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6605 ) -> t.Optional[exp.Var]: 6606 start = self._curr 6607 if not start: 6608 return None 6609 6610 option = start.text.upper() 6611 continuations = options.get(option) 6612 6613 index = self._index 6614 self._advance() 6615 for keywords in continuations or []: 6616 if isinstance(keywords, str): 6617 keywords = (keywords,) 6618 6619 if self._match_text_seq(*keywords): 6620 option = f"{option} {' '.join(keywords)}" 6621 break 6622 else: 6623 if continuations or continuations is None: 6624 if raise_unmatched: 6625 self.raise_error(f"Unknown option {option}") 6626 6627 self._retreat(index) 6628 return None 6629 6630 return exp.var(option) 6631 6632 def _parse_as_command(self, start: Token) -> exp.Command: 6633 while self._curr: 6634 self._advance() 6635 text = self._find_sql(start, self._prev) 6636 size = len(start.text) 6637 self._warn_unsupported() 6638 return exp.Command(this=text[:size], expression=text[size:]) 6639 6640 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6641 settings = [] 6642 6643 self._match_l_paren() 6644 kind = self._parse_id_var() 6645 6646 if self._match(TokenType.L_PAREN): 6647 while True: 6648 key = self._parse_id_var() 6649 value = self._parse_primary() 6650 6651 if not key and value is None: 6652 break 6653 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6654 self._match(TokenType.R_PAREN) 6655 6656 self._match_r_paren() 6657 6658 return self.expression( 6659 exp.DictProperty, 6660 this=this, 6661 kind=kind.this if kind else None, 6662 settings=settings, 6663 ) 6664 6665 def _parse_dict_range(self, this: str) -> exp.DictRange: 6666 self._match_l_paren() 6667 has_min = self._match_text_seq("MIN") 6668 if has_min: 6669 min = self._parse_var() or self._parse_primary() 6670 self._match_text_seq("MAX") 6671 max = self._parse_var() or self._parse_primary() 6672 else: 6673 max = self._parse_var() or self._parse_primary() 6674 min = exp.Literal.number(0) 6675 self._match_r_paren() 6676 return self.expression(exp.DictRange, this=this, min=min, max=max) 6677 6678 def _parse_comprehension( 6679 self, this: t.Optional[exp.Expression] 6680 ) -> t.Optional[exp.Comprehension]: 6681 index = self._index 6682 expression = self._parse_column() 6683 if not self._match(TokenType.IN): 6684 self._retreat(index - 1) 6685 return None 6686 iterator = self._parse_column() 6687 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6688 return self.expression( 6689 exp.Comprehension, 6690 this=this, 6691 expression=expression, 6692 iterator=iterator, 6693 condition=condition, 6694 ) 6695 6696 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6697 if self._match(TokenType.HEREDOC_STRING): 6698 return self.expression(exp.Heredoc, this=self._prev.text) 6699 6700 if not self._match_text_seq("$"): 6701 return None 6702 6703 tags = ["$"] 6704 tag_text = None 6705 6706 if self._is_connected(): 6707 self._advance() 6708 tags.append(self._prev.text.upper()) 6709 else: 6710 self.raise_error("No closing $ found") 6711 6712 if tags[-1] != "$": 6713 if self._is_connected() and self._match_text_seq("$"): 6714 tag_text = tags[-1] 6715 tags.append("$") 6716 else: 6717 self.raise_error("No closing $ found") 6718 6719 heredoc_start = self._curr 6720 6721 while self._curr: 6722 if self._match_text_seq(*tags, advance=False): 6723 this = self._find_sql(heredoc_start, self._prev) 6724 self._advance(len(tags)) 6725 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6726 6727 self._advance() 6728 6729 self.raise_error(f"No closing {''.join(tags)} found") 6730 return None 6731 6732 def _find_parser( 6733 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6734 ) -> t.Optional[t.Callable]: 6735 if not self._curr: 6736 return None 6737 6738 index = self._index 6739 this = [] 6740 while True: 6741 # The current token might be multiple words 6742 curr = self._curr.text.upper() 6743 key = curr.split(" ") 6744 this.append(curr) 6745 6746 self._advance() 6747 result, trie = in_trie(trie, key) 6748 if result == TrieResult.FAILED: 6749 break 6750 6751 if result == TrieResult.EXISTS: 6752 subparser = parsers[" ".join(this)] 6753 return subparser 6754 6755 self._retreat(index) 6756 return None 6757 6758 def _match(self, token_type, advance=True, expression=None): 6759 if not self._curr: 6760 return None 6761 6762 if self._curr.token_type == token_type: 6763 if advance: 6764 self._advance() 6765 self._add_comments(expression) 6766 return True 6767 6768 return None 6769 6770 def _match_set(self, types, advance=True): 6771 if not self._curr: 6772 return None 6773 6774 if self._curr.token_type in types: 6775 if advance: 6776 self._advance() 6777 return True 6778 6779 return None 6780 6781 def _match_pair(self, token_type_a, token_type_b, advance=True): 6782 if not self._curr or not self._next: 6783 return None 6784 6785 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6786 if advance: 6787 self._advance(2) 6788 return True 6789 6790 return None 6791 6792 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6793 if not self._match(TokenType.L_PAREN, expression=expression): 6794 self.raise_error("Expecting (") 6795 6796 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6797 if not self._match(TokenType.R_PAREN, expression=expression): 6798 self.raise_error("Expecting )") 6799 6800 def _match_texts(self, texts, advance=True): 6801 if self._curr and self._curr.text.upper() in texts: 6802 if advance: 6803 self._advance() 6804 return True 6805 return None 6806 6807 def _match_text_seq(self, *texts, advance=True): 6808 index = self._index 6809 for text in texts: 6810 if self._curr and self._curr.text.upper() == text: 6811 self._advance() 6812 else: 6813 self._retreat(index) 6814 return None 6815 6816 if not advance: 6817 self._retreat(index) 6818 6819 return True 6820 6821 def _replace_lambda( 6822 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6823 ) -> t.Optional[exp.Expression]: 6824 if not node: 6825 return node 6826 6827 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6828 6829 for column in node.find_all(exp.Column): 6830 typ = lambda_types.get(column.parts[0].name) 6831 if typ is not None: 6832 dot_or_id = column.to_dot() if column.table else column.this 6833 6834 if typ: 6835 dot_or_id = self.expression( 6836 exp.Cast, 6837 this=dot_or_id, 6838 to=typ, 6839 ) 6840 6841 parent = column.parent 6842 6843 while isinstance(parent, exp.Dot): 6844 if not isinstance(parent.parent, exp.Dot): 6845 parent.replace(dot_or_id) 6846 break 6847 parent = parent.parent 6848 else: 6849 if column is node: 6850 node = dot_or_id 6851 else: 6852 column.replace(dot_or_id) 6853 return node 6854 6855 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6856 start = self._prev 6857 6858 # Not to be confused with TRUNCATE(number, decimals) function call 6859 if self._match(TokenType.L_PAREN): 6860 self._retreat(self._index - 2) 6861 return self._parse_function() 6862 6863 # Clickhouse supports TRUNCATE DATABASE as well 6864 is_database = self._match(TokenType.DATABASE) 6865 6866 self._match(TokenType.TABLE) 6867 6868 exists = self._parse_exists(not_=False) 6869 6870 expressions = self._parse_csv( 6871 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6872 ) 6873 6874 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6875 6876 if self._match_text_seq("RESTART", "IDENTITY"): 6877 identity = "RESTART" 6878 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6879 identity = "CONTINUE" 6880 else: 6881 identity = None 6882 6883 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6884 option = self._prev.text 6885 else: 6886 option = None 6887 6888 partition = self._parse_partition() 6889 6890 # Fallback case 6891 if self._curr: 6892 return self._parse_as_command(start) 6893 6894 return self.expression( 6895 exp.TruncateTable, 6896 expressions=expressions, 6897 is_database=is_database, 6898 exists=exists, 6899 cluster=cluster, 6900 identity=identity, 6901 option=option, 6902 partition=partition, 6903 ) 6904 6905 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6906 this = self._parse_ordered(self._parse_opclass) 6907 6908 if not self._match(TokenType.WITH): 6909 return this 6910 6911 op = self._parse_var(any_token=True) 6912 6913 return self.expression(exp.WithOperator, this=this, op=op) 6914 6915 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6916 self._match(TokenType.EQ) 6917 self._match(TokenType.L_PAREN) 6918 6919 opts: t.List[t.Optional[exp.Expression]] = [] 6920 while self._curr and not self._match(TokenType.R_PAREN): 6921 if self._match_text_seq("FORMAT_NAME", "="): 6922 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6923 # so we parse it separately to use _parse_field() 6924 prop = self.expression( 6925 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6926 ) 6927 opts.append(prop) 6928 else: 6929 opts.append(self._parse_property()) 6930 6931 self._match(TokenType.COMMA) 6932 6933 return opts 6934 6935 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6936 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6937 6938 options = [] 6939 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6940 option = self._parse_var(any_token=True) 6941 prev = self._prev.text.upper() 6942 6943 # Different dialects might separate options and values by white space, "=" and "AS" 6944 self._match(TokenType.EQ) 6945 self._match(TokenType.ALIAS) 6946 6947 param = self.expression(exp.CopyParameter, this=option) 6948 6949 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6950 TokenType.L_PAREN, advance=False 6951 ): 6952 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6953 param.set("expressions", self._parse_wrapped_options()) 6954 elif prev == "FILE_FORMAT": 6955 # T-SQL's external file format case 6956 param.set("expression", self._parse_field()) 6957 else: 6958 param.set("expression", self._parse_unquoted_field()) 6959 6960 options.append(param) 6961 self._match(sep) 6962 6963 return options 6964 6965 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6966 expr = self.expression(exp.Credentials) 6967 6968 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6969 expr.set("storage", self._parse_field()) 6970 if self._match_text_seq("CREDENTIALS"): 6971 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6972 creds = ( 6973 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6974 ) 6975 expr.set("credentials", creds) 6976 if self._match_text_seq("ENCRYPTION"): 6977 expr.set("encryption", self._parse_wrapped_options()) 6978 if self._match_text_seq("IAM_ROLE"): 6979 expr.set("iam_role", self._parse_field()) 6980 if self._match_text_seq("REGION"): 6981 expr.set("region", self._parse_field()) 6982 6983 return expr 6984 6985 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6986 return self._parse_field() 6987 6988 def _parse_copy(self) -> exp.Copy | exp.Command: 6989 start = self._prev 6990 6991 self._match(TokenType.INTO) 6992 6993 this = ( 6994 self._parse_select(nested=True, parse_subquery_alias=False) 6995 if self._match(TokenType.L_PAREN, advance=False) 6996 else self._parse_table(schema=True) 6997 ) 6998 6999 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7000 7001 files = self._parse_csv(self._parse_file_location) 7002 credentials = self._parse_credentials() 7003 7004 self._match_text_seq("WITH") 7005 7006 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7007 7008 # Fallback case 7009 if self._curr: 7010 return self._parse_as_command(start) 7011 7012 return self.expression( 7013 exp.Copy, 7014 this=this, 7015 kind=kind, 7016 credentials=credentials, 7017 files=files, 7018 params=params, 7019 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
131class Parser(metaclass=_Parser): 132 """ 133 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 134 135 Args: 136 error_level: The desired error level. 137 Default: ErrorLevel.IMMEDIATE 138 error_message_context: The amount of context to capture from a query string when displaying 139 the error message (in number of characters). 140 Default: 100 141 max_errors: Maximum number of error messages to include in a raised ParseError. 142 This is only relevant if error_level is ErrorLevel.RAISE. 143 Default: 3 144 """ 145 146 FUNCTIONS: t.Dict[str, t.Callable] = { 147 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 148 "CONCAT": lambda args, dialect: exp.Concat( 149 expressions=args, 150 safe=not dialect.STRICT_STRING_CONCAT, 151 coalesce=dialect.CONCAT_COALESCE, 152 ), 153 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 154 expressions=args, 155 safe=not dialect.STRICT_STRING_CONCAT, 156 coalesce=dialect.CONCAT_COALESCE, 157 ), 158 "DATE_TO_DATE_STR": lambda args: exp.Cast( 159 this=seq_get(args, 0), 160 to=exp.DataType(this=exp.DataType.Type.TEXT), 161 ), 162 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 163 "HEX": build_hex, 164 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 165 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 166 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 167 "LIKE": build_like, 168 "LOG": build_logarithm, 169 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 170 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 171 "LOWER": build_lower, 172 "LPAD": lambda args: build_pad(args), 173 "LEFTPAD": lambda args: build_pad(args), 174 "MOD": build_mod, 175 "RPAD": lambda args: build_pad(args, is_left=False), 176 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 177 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 178 if len(args) != 2 179 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 180 "TIME_TO_TIME_STR": lambda args: exp.Cast( 181 this=seq_get(args, 0), 182 to=exp.DataType(this=exp.DataType.Type.TEXT), 183 ), 184 "TO_HEX": build_hex, 185 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 186 this=exp.Cast( 187 this=seq_get(args, 0), 188 to=exp.DataType(this=exp.DataType.Type.TEXT), 189 ), 190 start=exp.Literal.number(1), 191 length=exp.Literal.number(10), 192 ), 193 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 194 "UPPER": build_upper, 195 "VAR_MAP": build_var_map, 196 } 197 198 NO_PAREN_FUNCTIONS = { 199 TokenType.CURRENT_DATE: exp.CurrentDate, 200 TokenType.CURRENT_DATETIME: exp.CurrentDate, 201 TokenType.CURRENT_TIME: exp.CurrentTime, 202 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 203 TokenType.CURRENT_USER: exp.CurrentUser, 204 } 205 206 STRUCT_TYPE_TOKENS = { 207 TokenType.NESTED, 208 TokenType.OBJECT, 209 TokenType.STRUCT, 210 } 211 212 NESTED_TYPE_TOKENS = { 213 TokenType.ARRAY, 214 TokenType.LIST, 215 TokenType.LOWCARDINALITY, 216 TokenType.MAP, 217 TokenType.NULLABLE, 218 *STRUCT_TYPE_TOKENS, 219 } 220 221 ENUM_TYPE_TOKENS = { 222 TokenType.ENUM, 223 TokenType.ENUM8, 224 TokenType.ENUM16, 225 } 226 227 AGGREGATE_TYPE_TOKENS = { 228 TokenType.AGGREGATEFUNCTION, 229 TokenType.SIMPLEAGGREGATEFUNCTION, 230 } 231 232 TYPE_TOKENS = { 233 TokenType.BIT, 234 TokenType.BOOLEAN, 235 TokenType.TINYINT, 236 TokenType.UTINYINT, 237 TokenType.SMALLINT, 238 TokenType.USMALLINT, 239 TokenType.INT, 240 TokenType.UINT, 241 TokenType.BIGINT, 242 TokenType.UBIGINT, 243 TokenType.INT128, 244 TokenType.UINT128, 245 TokenType.INT256, 246 TokenType.UINT256, 247 TokenType.MEDIUMINT, 248 TokenType.UMEDIUMINT, 249 TokenType.FIXEDSTRING, 250 TokenType.FLOAT, 251 TokenType.DOUBLE, 252 TokenType.CHAR, 253 TokenType.NCHAR, 254 TokenType.VARCHAR, 255 TokenType.NVARCHAR, 256 TokenType.BPCHAR, 257 TokenType.TEXT, 258 TokenType.MEDIUMTEXT, 259 TokenType.LONGTEXT, 260 TokenType.MEDIUMBLOB, 261 TokenType.LONGBLOB, 262 TokenType.BINARY, 263 TokenType.VARBINARY, 264 TokenType.JSON, 265 TokenType.JSONB, 266 TokenType.INTERVAL, 267 TokenType.TINYBLOB, 268 TokenType.TINYTEXT, 269 TokenType.TIME, 270 TokenType.TIMETZ, 271 TokenType.TIMESTAMP, 272 TokenType.TIMESTAMP_S, 273 TokenType.TIMESTAMP_MS, 274 TokenType.TIMESTAMP_NS, 275 TokenType.TIMESTAMPTZ, 276 TokenType.TIMESTAMPLTZ, 277 TokenType.TIMESTAMPNTZ, 278 TokenType.DATETIME, 279 TokenType.DATETIME64, 280 TokenType.DATE, 281 TokenType.DATE32, 282 TokenType.INT4RANGE, 283 TokenType.INT4MULTIRANGE, 284 TokenType.INT8RANGE, 285 TokenType.INT8MULTIRANGE, 286 TokenType.NUMRANGE, 287 TokenType.NUMMULTIRANGE, 288 TokenType.TSRANGE, 289 TokenType.TSMULTIRANGE, 290 TokenType.TSTZRANGE, 291 TokenType.TSTZMULTIRANGE, 292 TokenType.DATERANGE, 293 TokenType.DATEMULTIRANGE, 294 TokenType.DECIMAL, 295 TokenType.UDECIMAL, 296 TokenType.BIGDECIMAL, 297 TokenType.UUID, 298 TokenType.GEOGRAPHY, 299 TokenType.GEOMETRY, 300 TokenType.HLLSKETCH, 301 TokenType.HSTORE, 302 TokenType.PSEUDO_TYPE, 303 TokenType.SUPER, 304 TokenType.SERIAL, 305 TokenType.SMALLSERIAL, 306 TokenType.BIGSERIAL, 307 TokenType.XML, 308 TokenType.YEAR, 309 TokenType.UNIQUEIDENTIFIER, 310 TokenType.USERDEFINED, 311 TokenType.MONEY, 312 TokenType.SMALLMONEY, 313 TokenType.ROWVERSION, 314 TokenType.IMAGE, 315 TokenType.VARIANT, 316 TokenType.VECTOR, 317 TokenType.OBJECT, 318 TokenType.OBJECT_IDENTIFIER, 319 TokenType.INET, 320 TokenType.IPADDRESS, 321 TokenType.IPPREFIX, 322 TokenType.IPV4, 323 TokenType.IPV6, 324 TokenType.UNKNOWN, 325 TokenType.NULL, 326 TokenType.NAME, 327 TokenType.TDIGEST, 328 *ENUM_TYPE_TOKENS, 329 *NESTED_TYPE_TOKENS, 330 *AGGREGATE_TYPE_TOKENS, 331 } 332 333 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 334 TokenType.BIGINT: TokenType.UBIGINT, 335 TokenType.INT: TokenType.UINT, 336 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 337 TokenType.SMALLINT: TokenType.USMALLINT, 338 TokenType.TINYINT: TokenType.UTINYINT, 339 TokenType.DECIMAL: TokenType.UDECIMAL, 340 } 341 342 SUBQUERY_PREDICATES = { 343 TokenType.ANY: exp.Any, 344 TokenType.ALL: exp.All, 345 TokenType.EXISTS: exp.Exists, 346 TokenType.SOME: exp.Any, 347 } 348 349 RESERVED_TOKENS = { 350 *Tokenizer.SINGLE_TOKENS.values(), 351 TokenType.SELECT, 352 } - {TokenType.IDENTIFIER} 353 354 DB_CREATABLES = { 355 TokenType.DATABASE, 356 TokenType.DICTIONARY, 357 TokenType.MODEL, 358 TokenType.SCHEMA, 359 TokenType.SEQUENCE, 360 TokenType.STORAGE_INTEGRATION, 361 TokenType.TABLE, 362 TokenType.TAG, 363 TokenType.VIEW, 364 TokenType.WAREHOUSE, 365 TokenType.STREAMLIT, 366 } 367 368 CREATABLES = { 369 TokenType.COLUMN, 370 TokenType.CONSTRAINT, 371 TokenType.FOREIGN_KEY, 372 TokenType.FUNCTION, 373 TokenType.INDEX, 374 TokenType.PROCEDURE, 375 *DB_CREATABLES, 376 } 377 378 # Tokens that can represent identifiers 379 ID_VAR_TOKENS = { 380 TokenType.ALL, 381 TokenType.VAR, 382 TokenType.ANTI, 383 TokenType.APPLY, 384 TokenType.ASC, 385 TokenType.ASOF, 386 TokenType.AUTO_INCREMENT, 387 TokenType.BEGIN, 388 TokenType.BPCHAR, 389 TokenType.CACHE, 390 TokenType.CASE, 391 TokenType.COLLATE, 392 TokenType.COMMAND, 393 TokenType.COMMENT, 394 TokenType.COMMIT, 395 TokenType.CONSTRAINT, 396 TokenType.COPY, 397 TokenType.CUBE, 398 TokenType.DEFAULT, 399 TokenType.DELETE, 400 TokenType.DESC, 401 TokenType.DESCRIBE, 402 TokenType.DICTIONARY, 403 TokenType.DIV, 404 TokenType.END, 405 TokenType.EXECUTE, 406 TokenType.ESCAPE, 407 TokenType.FALSE, 408 TokenType.FIRST, 409 TokenType.FILTER, 410 TokenType.FINAL, 411 TokenType.FORMAT, 412 TokenType.FULL, 413 TokenType.IDENTIFIER, 414 TokenType.IS, 415 TokenType.ISNULL, 416 TokenType.INTERVAL, 417 TokenType.KEEP, 418 TokenType.KILL, 419 TokenType.LEFT, 420 TokenType.LOAD, 421 TokenType.MERGE, 422 TokenType.NATURAL, 423 TokenType.NEXT, 424 TokenType.OFFSET, 425 TokenType.OPERATOR, 426 TokenType.ORDINALITY, 427 TokenType.OVERLAPS, 428 TokenType.OVERWRITE, 429 TokenType.PARTITION, 430 TokenType.PERCENT, 431 TokenType.PIVOT, 432 TokenType.PRAGMA, 433 TokenType.RANGE, 434 TokenType.RECURSIVE, 435 TokenType.REFERENCES, 436 TokenType.REFRESH, 437 TokenType.REPLACE, 438 TokenType.RIGHT, 439 TokenType.ROLLUP, 440 TokenType.ROW, 441 TokenType.ROWS, 442 TokenType.SEMI, 443 TokenType.SET, 444 TokenType.SETTINGS, 445 TokenType.SHOW, 446 TokenType.TEMPORARY, 447 TokenType.TOP, 448 TokenType.TRUE, 449 TokenType.TRUNCATE, 450 TokenType.UNIQUE, 451 TokenType.UNNEST, 452 TokenType.UNPIVOT, 453 TokenType.UPDATE, 454 TokenType.USE, 455 TokenType.VOLATILE, 456 TokenType.WINDOW, 457 *CREATABLES, 458 *SUBQUERY_PREDICATES, 459 *TYPE_TOKENS, 460 *NO_PAREN_FUNCTIONS, 461 } 462 463 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 464 465 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 466 TokenType.ANTI, 467 TokenType.APPLY, 468 TokenType.ASOF, 469 TokenType.FULL, 470 TokenType.LEFT, 471 TokenType.LOCK, 472 TokenType.NATURAL, 473 TokenType.OFFSET, 474 TokenType.RIGHT, 475 TokenType.SEMI, 476 TokenType.WINDOW, 477 } 478 479 ALIAS_TOKENS = ID_VAR_TOKENS 480 481 ARRAY_CONSTRUCTORS = { 482 "ARRAY": exp.Array, 483 "LIST": exp.List, 484 } 485 486 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 487 488 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 489 490 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 491 492 FUNC_TOKENS = { 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.CURRENT_DATE, 496 TokenType.CURRENT_DATETIME, 497 TokenType.CURRENT_TIMESTAMP, 498 TokenType.CURRENT_TIME, 499 TokenType.CURRENT_USER, 500 TokenType.FILTER, 501 TokenType.FIRST, 502 TokenType.FORMAT, 503 TokenType.GLOB, 504 TokenType.IDENTIFIER, 505 TokenType.INDEX, 506 TokenType.ISNULL, 507 TokenType.ILIKE, 508 TokenType.INSERT, 509 TokenType.LIKE, 510 TokenType.MERGE, 511 TokenType.OFFSET, 512 TokenType.PRIMARY_KEY, 513 TokenType.RANGE, 514 TokenType.REPLACE, 515 TokenType.RLIKE, 516 TokenType.ROW, 517 TokenType.UNNEST, 518 TokenType.VAR, 519 TokenType.LEFT, 520 TokenType.RIGHT, 521 TokenType.SEQUENCE, 522 TokenType.DATE, 523 TokenType.DATETIME, 524 TokenType.TABLE, 525 TokenType.TIMESTAMP, 526 TokenType.TIMESTAMPTZ, 527 TokenType.TRUNCATE, 528 TokenType.WINDOW, 529 TokenType.XOR, 530 *TYPE_TOKENS, 531 *SUBQUERY_PREDICATES, 532 } 533 534 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 535 TokenType.AND: exp.And, 536 } 537 538 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 539 TokenType.COLON_EQ: exp.PropertyEQ, 540 } 541 542 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 543 TokenType.OR: exp.Or, 544 } 545 546 EQUALITY = { 547 TokenType.EQ: exp.EQ, 548 TokenType.NEQ: exp.NEQ, 549 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 550 } 551 552 COMPARISON = { 553 TokenType.GT: exp.GT, 554 TokenType.GTE: exp.GTE, 555 TokenType.LT: exp.LT, 556 TokenType.LTE: exp.LTE, 557 } 558 559 BITWISE = { 560 TokenType.AMP: exp.BitwiseAnd, 561 TokenType.CARET: exp.BitwiseXor, 562 TokenType.PIPE: exp.BitwiseOr, 563 } 564 565 TERM = { 566 TokenType.DASH: exp.Sub, 567 TokenType.PLUS: exp.Add, 568 TokenType.MOD: exp.Mod, 569 TokenType.COLLATE: exp.Collate, 570 } 571 572 FACTOR = { 573 TokenType.DIV: exp.IntDiv, 574 TokenType.LR_ARROW: exp.Distance, 575 TokenType.SLASH: exp.Div, 576 TokenType.STAR: exp.Mul, 577 } 578 579 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 580 581 TIMES = { 582 TokenType.TIME, 583 TokenType.TIMETZ, 584 } 585 586 TIMESTAMPS = { 587 TokenType.TIMESTAMP, 588 TokenType.TIMESTAMPTZ, 589 TokenType.TIMESTAMPLTZ, 590 *TIMES, 591 } 592 593 SET_OPERATIONS = { 594 TokenType.UNION, 595 TokenType.INTERSECT, 596 TokenType.EXCEPT, 597 } 598 599 JOIN_METHODS = { 600 TokenType.ASOF, 601 TokenType.NATURAL, 602 TokenType.POSITIONAL, 603 } 604 605 JOIN_SIDES = { 606 TokenType.LEFT, 607 TokenType.RIGHT, 608 TokenType.FULL, 609 } 610 611 JOIN_KINDS = { 612 TokenType.ANTI, 613 TokenType.CROSS, 614 TokenType.INNER, 615 TokenType.OUTER, 616 TokenType.SEMI, 617 TokenType.STRAIGHT_JOIN, 618 } 619 620 JOIN_HINTS: t.Set[str] = set() 621 622 LAMBDAS = { 623 TokenType.ARROW: lambda self, expressions: self.expression( 624 exp.Lambda, 625 this=self._replace_lambda( 626 self._parse_assignment(), 627 expressions, 628 ), 629 expressions=expressions, 630 ), 631 TokenType.FARROW: lambda self, expressions: self.expression( 632 exp.Kwarg, 633 this=exp.var(expressions[0].name), 634 expression=self._parse_assignment(), 635 ), 636 } 637 638 COLUMN_OPERATORS = { 639 TokenType.DOT: None, 640 TokenType.DCOLON: lambda self, this, to: self.expression( 641 exp.Cast if self.STRICT_CAST else exp.TryCast, 642 this=this, 643 to=to, 644 ), 645 TokenType.ARROW: lambda self, this, path: self.expression( 646 exp.JSONExtract, 647 this=this, 648 expression=self.dialect.to_json_path(path), 649 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 650 ), 651 TokenType.DARROW: lambda self, this, path: self.expression( 652 exp.JSONExtractScalar, 653 this=this, 654 expression=self.dialect.to_json_path(path), 655 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 656 ), 657 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 658 exp.JSONBExtract, 659 this=this, 660 expression=path, 661 ), 662 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 663 exp.JSONBExtractScalar, 664 this=this, 665 expression=path, 666 ), 667 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 668 exp.JSONBContains, 669 this=this, 670 expression=key, 671 ), 672 } 673 674 EXPRESSION_PARSERS = { 675 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 676 exp.Column: lambda self: self._parse_column(), 677 exp.Condition: lambda self: self._parse_assignment(), 678 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 679 exp.Expression: lambda self: self._parse_expression(), 680 exp.From: lambda self: self._parse_from(joins=True), 681 exp.Group: lambda self: self._parse_group(), 682 exp.Having: lambda self: self._parse_having(), 683 exp.Identifier: lambda self: self._parse_id_var(), 684 exp.Join: lambda self: self._parse_join(), 685 exp.Lambda: lambda self: self._parse_lambda(), 686 exp.Lateral: lambda self: self._parse_lateral(), 687 exp.Limit: lambda self: self._parse_limit(), 688 exp.Offset: lambda self: self._parse_offset(), 689 exp.Order: lambda self: self._parse_order(), 690 exp.Ordered: lambda self: self._parse_ordered(), 691 exp.Properties: lambda self: self._parse_properties(), 692 exp.Qualify: lambda self: self._parse_qualify(), 693 exp.Returning: lambda self: self._parse_returning(), 694 exp.Select: lambda self: self._parse_select(), 695 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 696 exp.Table: lambda self: self._parse_table_parts(), 697 exp.TableAlias: lambda self: self._parse_table_alias(), 698 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 699 exp.Where: lambda self: self._parse_where(), 700 exp.Window: lambda self: self._parse_named_window(), 701 exp.With: lambda self: self._parse_with(), 702 "JOIN_TYPE": lambda self: self._parse_join_parts(), 703 } 704 705 STATEMENT_PARSERS = { 706 TokenType.ALTER: lambda self: self._parse_alter(), 707 TokenType.BEGIN: lambda self: self._parse_transaction(), 708 TokenType.CACHE: lambda self: self._parse_cache(), 709 TokenType.COMMENT: lambda self: self._parse_comment(), 710 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 711 TokenType.COPY: lambda self: self._parse_copy(), 712 TokenType.CREATE: lambda self: self._parse_create(), 713 TokenType.DELETE: lambda self: self._parse_delete(), 714 TokenType.DESC: lambda self: self._parse_describe(), 715 TokenType.DESCRIBE: lambda self: self._parse_describe(), 716 TokenType.DROP: lambda self: self._parse_drop(), 717 TokenType.INSERT: lambda self: self._parse_insert(), 718 TokenType.KILL: lambda self: self._parse_kill(), 719 TokenType.LOAD: lambda self: self._parse_load(), 720 TokenType.MERGE: lambda self: self._parse_merge(), 721 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 722 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 723 TokenType.REFRESH: lambda self: self._parse_refresh(), 724 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 725 TokenType.SET: lambda self: self._parse_set(), 726 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 727 TokenType.UNCACHE: lambda self: self._parse_uncache(), 728 TokenType.UPDATE: lambda self: self._parse_update(), 729 TokenType.USE: lambda self: self.expression( 730 exp.Use, 731 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 732 this=self._parse_table(schema=False), 733 ), 734 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 735 } 736 737 UNARY_PARSERS = { 738 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 739 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 740 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 741 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 742 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 743 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 744 } 745 746 STRING_PARSERS = { 747 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 748 exp.RawString, this=token.text 749 ), 750 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 751 exp.National, this=token.text 752 ), 753 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 754 TokenType.STRING: lambda self, token: self.expression( 755 exp.Literal, this=token.text, is_string=True 756 ), 757 TokenType.UNICODE_STRING: lambda self, token: self.expression( 758 exp.UnicodeString, 759 this=token.text, 760 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 761 ), 762 } 763 764 NUMERIC_PARSERS = { 765 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 766 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 767 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 768 TokenType.NUMBER: lambda self, token: self.expression( 769 exp.Literal, this=token.text, is_string=False 770 ), 771 } 772 773 PRIMARY_PARSERS = { 774 **STRING_PARSERS, 775 **NUMERIC_PARSERS, 776 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 777 TokenType.NULL: lambda self, _: self.expression(exp.Null), 778 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 779 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 780 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 781 TokenType.STAR: lambda self, _: self.expression( 782 exp.Star, 783 **{ 784 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 785 "replace": self._parse_star_op("REPLACE"), 786 "rename": self._parse_star_op("RENAME"), 787 }, 788 ), 789 } 790 791 PLACEHOLDER_PARSERS = { 792 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 793 TokenType.PARAMETER: lambda self: self._parse_parameter(), 794 TokenType.COLON: lambda self: ( 795 self.expression(exp.Placeholder, this=self._prev.text) 796 if self._match_set(self.ID_VAR_TOKENS) 797 else None 798 ), 799 } 800 801 RANGE_PARSERS = { 802 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 803 TokenType.GLOB: binary_range_parser(exp.Glob), 804 TokenType.ILIKE: binary_range_parser(exp.ILike), 805 TokenType.IN: lambda self, this: self._parse_in(this), 806 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 807 TokenType.IS: lambda self, this: self._parse_is(this), 808 TokenType.LIKE: binary_range_parser(exp.Like), 809 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 810 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 811 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 812 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 813 } 814 815 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 816 "ALLOWED_VALUES": lambda self: self.expression( 817 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 818 ), 819 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 820 "AUTO": lambda self: self._parse_auto_property(), 821 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 822 "BACKUP": lambda self: self.expression( 823 exp.BackupProperty, this=self._parse_var(any_token=True) 824 ), 825 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 826 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 827 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 828 "CHECKSUM": lambda self: self._parse_checksum(), 829 "CLUSTER BY": lambda self: self._parse_cluster(), 830 "CLUSTERED": lambda self: self._parse_clustered_by(), 831 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 832 exp.CollateProperty, **kwargs 833 ), 834 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 835 "CONTAINS": lambda self: self._parse_contains_property(), 836 "COPY": lambda self: self._parse_copy_property(), 837 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 838 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 839 "DEFINER": lambda self: self._parse_definer(), 840 "DETERMINISTIC": lambda self: self.expression( 841 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 842 ), 843 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 844 "DISTKEY": lambda self: self._parse_distkey(), 845 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 846 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 847 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 848 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 849 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 850 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 851 "FREESPACE": lambda self: self._parse_freespace(), 852 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 853 "HEAP": lambda self: self.expression(exp.HeapProperty), 854 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 855 "IMMUTABLE": lambda self: self.expression( 856 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 857 ), 858 "INHERITS": lambda self: self.expression( 859 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 860 ), 861 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 862 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 863 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 864 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 865 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 866 "LIKE": lambda self: self._parse_create_like(), 867 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 868 "LOCK": lambda self: self._parse_locking(), 869 "LOCKING": lambda self: self._parse_locking(), 870 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 871 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 872 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 873 "MODIFIES": lambda self: self._parse_modifies_property(), 874 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 875 "NO": lambda self: self._parse_no_property(), 876 "ON": lambda self: self._parse_on_property(), 877 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 878 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 879 "PARTITION": lambda self: self._parse_partitioned_of(), 880 "PARTITION BY": lambda self: self._parse_partitioned_by(), 881 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 882 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 883 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 884 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 885 "READS": lambda self: self._parse_reads_property(), 886 "REMOTE": lambda self: self._parse_remote_with_connection(), 887 "RETURNS": lambda self: self._parse_returns(), 888 "STRICT": lambda self: self.expression(exp.StrictProperty), 889 "ROW": lambda self: self._parse_row(), 890 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 891 "SAMPLE": lambda self: self.expression( 892 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 893 ), 894 "SECURE": lambda self: self.expression(exp.SecureProperty), 895 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 896 "SETTINGS": lambda self: self.expression( 897 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 898 ), 899 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 900 "SORTKEY": lambda self: self._parse_sortkey(), 901 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 902 "STABLE": lambda self: self.expression( 903 exp.StabilityProperty, this=exp.Literal.string("STABLE") 904 ), 905 "STORED": lambda self: self._parse_stored(), 906 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 907 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 908 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 909 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 910 "TO": lambda self: self._parse_to_table(), 911 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 912 "TRANSFORM": lambda self: self.expression( 913 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 914 ), 915 "TTL": lambda self: self._parse_ttl(), 916 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 917 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 918 "VOLATILE": lambda self: self._parse_volatile_property(), 919 "WITH": lambda self: self._parse_with_property(), 920 } 921 922 CONSTRAINT_PARSERS = { 923 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 924 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 925 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 926 "CHARACTER SET": lambda self: self.expression( 927 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 928 ), 929 "CHECK": lambda self: self.expression( 930 exp.CheckColumnConstraint, 931 this=self._parse_wrapped(self._parse_assignment), 932 enforced=self._match_text_seq("ENFORCED"), 933 ), 934 "COLLATE": lambda self: self.expression( 935 exp.CollateColumnConstraint, 936 this=self._parse_identifier() or self._parse_column(), 937 ), 938 "COMMENT": lambda self: self.expression( 939 exp.CommentColumnConstraint, this=self._parse_string() 940 ), 941 "COMPRESS": lambda self: self._parse_compress(), 942 "CLUSTERED": lambda self: self.expression( 943 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 944 ), 945 "NONCLUSTERED": lambda self: self.expression( 946 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 947 ), 948 "DEFAULT": lambda self: self.expression( 949 exp.DefaultColumnConstraint, this=self._parse_bitwise() 950 ), 951 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 952 "EPHEMERAL": lambda self: self.expression( 953 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 954 ), 955 "EXCLUDE": lambda self: self.expression( 956 exp.ExcludeColumnConstraint, this=self._parse_index_params() 957 ), 958 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 959 "FORMAT": lambda self: self.expression( 960 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 961 ), 962 "GENERATED": lambda self: self._parse_generated_as_identity(), 963 "IDENTITY": lambda self: self._parse_auto_increment(), 964 "INLINE": lambda self: self._parse_inline(), 965 "LIKE": lambda self: self._parse_create_like(), 966 "NOT": lambda self: self._parse_not_constraint(), 967 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 968 "ON": lambda self: ( 969 self._match(TokenType.UPDATE) 970 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 971 ) 972 or self.expression(exp.OnProperty, this=self._parse_id_var()), 973 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 974 "PERIOD": lambda self: self._parse_period_for_system_time(), 975 "PRIMARY KEY": lambda self: self._parse_primary_key(), 976 "REFERENCES": lambda self: self._parse_references(match=False), 977 "TITLE": lambda self: self.expression( 978 exp.TitleColumnConstraint, this=self._parse_var_or_string() 979 ), 980 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 981 "UNIQUE": lambda self: self._parse_unique(), 982 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 983 "WITH": lambda self: self.expression( 984 exp.Properties, expressions=self._parse_wrapped_properties() 985 ), 986 } 987 988 ALTER_PARSERS = { 989 "ADD": lambda self: self._parse_alter_table_add(), 990 "ALTER": lambda self: self._parse_alter_table_alter(), 991 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 992 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 993 "DROP": lambda self: self._parse_alter_table_drop(), 994 "RENAME": lambda self: self._parse_alter_table_rename(), 995 "SET": lambda self: self._parse_alter_table_set(), 996 } 997 998 ALTER_ALTER_PARSERS = { 999 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1000 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1001 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1002 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1003 } 1004 1005 SCHEMA_UNNAMED_CONSTRAINTS = { 1006 "CHECK", 1007 "EXCLUDE", 1008 "FOREIGN KEY", 1009 "LIKE", 1010 "PERIOD", 1011 "PRIMARY KEY", 1012 "UNIQUE", 1013 } 1014 1015 NO_PAREN_FUNCTION_PARSERS = { 1016 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1017 "CASE": lambda self: self._parse_case(), 1018 "CONNECT_BY_ROOT": lambda self: self.expression( 1019 exp.ConnectByRoot, this=self._parse_column() 1020 ), 1021 "IF": lambda self: self._parse_if(), 1022 "NEXT": lambda self: self._parse_next_value_for(), 1023 } 1024 1025 INVALID_FUNC_NAME_TOKENS = { 1026 TokenType.IDENTIFIER, 1027 TokenType.STRING, 1028 } 1029 1030 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1031 1032 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1033 1034 FUNCTION_PARSERS = { 1035 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1036 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1037 "DECODE": lambda self: self._parse_decode(), 1038 "EXTRACT": lambda self: self._parse_extract(), 1039 "GAP_FILL": lambda self: self._parse_gap_fill(), 1040 "JSON_OBJECT": lambda self: self._parse_json_object(), 1041 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1042 "JSON_TABLE": lambda self: self._parse_json_table(), 1043 "MATCH": lambda self: self._parse_match_against(), 1044 "OPENJSON": lambda self: self._parse_open_json(), 1045 "POSITION": lambda self: self._parse_position(), 1046 "PREDICT": lambda self: self._parse_predict(), 1047 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1048 "STRING_AGG": lambda self: self._parse_string_agg(), 1049 "SUBSTRING": lambda self: self._parse_substring(), 1050 "TRIM": lambda self: self._parse_trim(), 1051 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1052 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1053 } 1054 1055 QUERY_MODIFIER_PARSERS = { 1056 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1057 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1058 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1059 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1060 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1061 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1062 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1063 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1064 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1065 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1066 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1067 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1068 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1069 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1070 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1071 TokenType.CLUSTER_BY: lambda self: ( 1072 "cluster", 1073 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1074 ), 1075 TokenType.DISTRIBUTE_BY: lambda self: ( 1076 "distribute", 1077 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1078 ), 1079 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1080 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1081 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1082 } 1083 1084 SET_PARSERS = { 1085 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1086 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1087 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1088 "TRANSACTION": lambda self: self._parse_set_transaction(), 1089 } 1090 1091 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1092 1093 TYPE_LITERAL_PARSERS = { 1094 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1095 } 1096 1097 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1098 1099 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1100 1101 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1102 1103 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1104 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1105 "ISOLATION": ( 1106 ("LEVEL", "REPEATABLE", "READ"), 1107 ("LEVEL", "READ", "COMMITTED"), 1108 ("LEVEL", "READ", "UNCOMITTED"), 1109 ("LEVEL", "SERIALIZABLE"), 1110 ), 1111 "READ": ("WRITE", "ONLY"), 1112 } 1113 1114 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1115 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1116 ) 1117 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1118 1119 CREATE_SEQUENCE: OPTIONS_TYPE = { 1120 "SCALE": ("EXTEND", "NOEXTEND"), 1121 "SHARD": ("EXTEND", "NOEXTEND"), 1122 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1123 **dict.fromkeys( 1124 ( 1125 "SESSION", 1126 "GLOBAL", 1127 "KEEP", 1128 "NOKEEP", 1129 "ORDER", 1130 "NOORDER", 1131 "NOCACHE", 1132 "CYCLE", 1133 "NOCYCLE", 1134 "NOMINVALUE", 1135 "NOMAXVALUE", 1136 "NOSCALE", 1137 "NOSHARD", 1138 ), 1139 tuple(), 1140 ), 1141 } 1142 1143 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1144 1145 USABLES: OPTIONS_TYPE = dict.fromkeys( 1146 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1147 ) 1148 1149 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1150 1151 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1152 "TYPE": ("EVOLUTION",), 1153 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1154 } 1155 1156 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1157 "NOT": ("ENFORCED",), 1158 "MATCH": ( 1159 "FULL", 1160 "PARTIAL", 1161 "SIMPLE", 1162 ), 1163 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1164 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1165 } 1166 1167 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1168 1169 CLONE_KEYWORDS = {"CLONE", "COPY"} 1170 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1171 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1172 1173 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1174 1175 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1176 1177 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1178 1179 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1180 1181 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1182 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1183 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1184 1185 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1186 1187 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1188 1189 ADD_CONSTRAINT_TOKENS = { 1190 TokenType.CONSTRAINT, 1191 TokenType.FOREIGN_KEY, 1192 TokenType.INDEX, 1193 TokenType.KEY, 1194 TokenType.PRIMARY_KEY, 1195 TokenType.UNIQUE, 1196 } 1197 1198 DISTINCT_TOKENS = {TokenType.DISTINCT} 1199 1200 NULL_TOKENS = {TokenType.NULL} 1201 1202 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1203 1204 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1205 1206 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1207 1208 STRICT_CAST = True 1209 1210 PREFIXED_PIVOT_COLUMNS = False 1211 IDENTIFY_PIVOT_STRINGS = False 1212 1213 LOG_DEFAULTS_TO_LN = False 1214 1215 # Whether ADD is present for each column added by ALTER TABLE 1216 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1217 1218 # Whether the table sample clause expects CSV syntax 1219 TABLESAMPLE_CSV = False 1220 1221 # The default method used for table sampling 1222 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1223 1224 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1225 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1226 1227 # Whether the TRIM function expects the characters to trim as its first argument 1228 TRIM_PATTERN_FIRST = False 1229 1230 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1231 STRING_ALIASES = False 1232 1233 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1234 MODIFIERS_ATTACHED_TO_SET_OP = True 1235 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1236 1237 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1238 NO_PAREN_IF_COMMANDS = True 1239 1240 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1241 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1242 1243 # Whether the `:` operator is used to extract a value from a VARIANT column 1244 COLON_IS_VARIANT_EXTRACT = False 1245 1246 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1247 # If this is True and '(' is not found, the keyword will be treated as an identifier 1248 VALUES_FOLLOWED_BY_PAREN = True 1249 1250 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1251 SUPPORTS_IMPLICIT_UNNEST = False 1252 1253 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1254 INTERVAL_SPANS = True 1255 1256 # Whether a PARTITION clause can follow a table reference 1257 SUPPORTS_PARTITION_SELECTION = False 1258 1259 __slots__ = ( 1260 "error_level", 1261 "error_message_context", 1262 "max_errors", 1263 "dialect", 1264 "sql", 1265 "errors", 1266 "_tokens", 1267 "_index", 1268 "_curr", 1269 "_next", 1270 "_prev", 1271 "_prev_comments", 1272 ) 1273 1274 # Autofilled 1275 SHOW_TRIE: t.Dict = {} 1276 SET_TRIE: t.Dict = {} 1277 1278 def __init__( 1279 self, 1280 error_level: t.Optional[ErrorLevel] = None, 1281 error_message_context: int = 100, 1282 max_errors: int = 3, 1283 dialect: DialectType = None, 1284 ): 1285 from sqlglot.dialects import Dialect 1286 1287 self.error_level = error_level or ErrorLevel.IMMEDIATE 1288 self.error_message_context = error_message_context 1289 self.max_errors = max_errors 1290 self.dialect = Dialect.get_or_raise(dialect) 1291 self.reset() 1292 1293 def reset(self): 1294 self.sql = "" 1295 self.errors = [] 1296 self._tokens = [] 1297 self._index = 0 1298 self._curr = None 1299 self._next = None 1300 self._prev = None 1301 self._prev_comments = None 1302 1303 def parse( 1304 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1305 ) -> t.List[t.Optional[exp.Expression]]: 1306 """ 1307 Parses a list of tokens and returns a list of syntax trees, one tree 1308 per parsed SQL statement. 1309 1310 Args: 1311 raw_tokens: The list of tokens. 1312 sql: The original SQL string, used to produce helpful debug messages. 1313 1314 Returns: 1315 The list of the produced syntax trees. 1316 """ 1317 return self._parse( 1318 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1319 ) 1320 1321 def parse_into( 1322 self, 1323 expression_types: exp.IntoType, 1324 raw_tokens: t.List[Token], 1325 sql: t.Optional[str] = None, 1326 ) -> t.List[t.Optional[exp.Expression]]: 1327 """ 1328 Parses a list of tokens into a given Expression type. If a collection of Expression 1329 types is given instead, this method will try to parse the token list into each one 1330 of them, stopping at the first for which the parsing succeeds. 1331 1332 Args: 1333 expression_types: The expression type(s) to try and parse the token list into. 1334 raw_tokens: The list of tokens. 1335 sql: The original SQL string, used to produce helpful debug messages. 1336 1337 Returns: 1338 The target Expression. 1339 """ 1340 errors = [] 1341 for expression_type in ensure_list(expression_types): 1342 parser = self.EXPRESSION_PARSERS.get(expression_type) 1343 if not parser: 1344 raise TypeError(f"No parser registered for {expression_type}") 1345 1346 try: 1347 return self._parse(parser, raw_tokens, sql) 1348 except ParseError as e: 1349 e.errors[0]["into_expression"] = expression_type 1350 errors.append(e) 1351 1352 raise ParseError( 1353 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1354 errors=merge_errors(errors), 1355 ) from errors[-1] 1356 1357 def _parse( 1358 self, 1359 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1360 raw_tokens: t.List[Token], 1361 sql: t.Optional[str] = None, 1362 ) -> t.List[t.Optional[exp.Expression]]: 1363 self.reset() 1364 self.sql = sql or "" 1365 1366 total = len(raw_tokens) 1367 chunks: t.List[t.List[Token]] = [[]] 1368 1369 for i, token in enumerate(raw_tokens): 1370 if token.token_type == TokenType.SEMICOLON: 1371 if token.comments: 1372 chunks.append([token]) 1373 1374 if i < total - 1: 1375 chunks.append([]) 1376 else: 1377 chunks[-1].append(token) 1378 1379 expressions = [] 1380 1381 for tokens in chunks: 1382 self._index = -1 1383 self._tokens = tokens 1384 self._advance() 1385 1386 expressions.append(parse_method(self)) 1387 1388 if self._index < len(self._tokens): 1389 self.raise_error("Invalid expression / Unexpected token") 1390 1391 self.check_errors() 1392 1393 return expressions 1394 1395 def check_errors(self) -> None: 1396 """Logs or raises any found errors, depending on the chosen error level setting.""" 1397 if self.error_level == ErrorLevel.WARN: 1398 for error in self.errors: 1399 logger.error(str(error)) 1400 elif self.error_level == ErrorLevel.RAISE and self.errors: 1401 raise ParseError( 1402 concat_messages(self.errors, self.max_errors), 1403 errors=merge_errors(self.errors), 1404 ) 1405 1406 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1407 """ 1408 Appends an error in the list of recorded errors or raises it, depending on the chosen 1409 error level setting. 1410 """ 1411 token = token or self._curr or self._prev or Token.string("") 1412 start = token.start 1413 end = token.end + 1 1414 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1415 highlight = self.sql[start:end] 1416 end_context = self.sql[end : end + self.error_message_context] 1417 1418 error = ParseError.new( 1419 f"{message}. Line {token.line}, Col: {token.col}.\n" 1420 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1421 description=message, 1422 line=token.line, 1423 col=token.col, 1424 start_context=start_context, 1425 highlight=highlight, 1426 end_context=end_context, 1427 ) 1428 1429 if self.error_level == ErrorLevel.IMMEDIATE: 1430 raise error 1431 1432 self.errors.append(error) 1433 1434 def expression( 1435 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1436 ) -> E: 1437 """ 1438 Creates a new, validated Expression. 1439 1440 Args: 1441 exp_class: The expression class to instantiate. 1442 comments: An optional list of comments to attach to the expression. 1443 kwargs: The arguments to set for the expression along with their respective values. 1444 1445 Returns: 1446 The target expression. 1447 """ 1448 instance = exp_class(**kwargs) 1449 instance.add_comments(comments) if comments else self._add_comments(instance) 1450 return self.validate_expression(instance) 1451 1452 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1453 if expression and self._prev_comments: 1454 expression.add_comments(self._prev_comments) 1455 self._prev_comments = None 1456 1457 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1458 """ 1459 Validates an Expression, making sure that all its mandatory arguments are set. 1460 1461 Args: 1462 expression: The expression to validate. 1463 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1464 1465 Returns: 1466 The validated expression. 1467 """ 1468 if self.error_level != ErrorLevel.IGNORE: 1469 for error_message in expression.error_messages(args): 1470 self.raise_error(error_message) 1471 1472 return expression 1473 1474 def _find_sql(self, start: Token, end: Token) -> str: 1475 return self.sql[start.start : end.end + 1] 1476 1477 def _is_connected(self) -> bool: 1478 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1479 1480 def _advance(self, times: int = 1) -> None: 1481 self._index += times 1482 self._curr = seq_get(self._tokens, self._index) 1483 self._next = seq_get(self._tokens, self._index + 1) 1484 1485 if self._index > 0: 1486 self._prev = self._tokens[self._index - 1] 1487 self._prev_comments = self._prev.comments 1488 else: 1489 self._prev = None 1490 self._prev_comments = None 1491 1492 def _retreat(self, index: int) -> None: 1493 if index != self._index: 1494 self._advance(index - self._index) 1495 1496 def _warn_unsupported(self) -> None: 1497 if len(self._tokens) <= 1: 1498 return 1499 1500 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1501 # interested in emitting a warning for the one being currently processed. 1502 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1503 1504 logger.warning( 1505 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1506 ) 1507 1508 def _parse_command(self) -> exp.Command: 1509 self._warn_unsupported() 1510 return self.expression( 1511 exp.Command, 1512 comments=self._prev_comments, 1513 this=self._prev.text.upper(), 1514 expression=self._parse_string(), 1515 ) 1516 1517 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1518 """ 1519 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1520 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1521 solve this by setting & resetting the parser state accordingly 1522 """ 1523 index = self._index 1524 error_level = self.error_level 1525 1526 self.error_level = ErrorLevel.IMMEDIATE 1527 try: 1528 this = parse_method() 1529 except ParseError: 1530 this = None 1531 finally: 1532 if not this or retreat: 1533 self._retreat(index) 1534 self.error_level = error_level 1535 1536 return this 1537 1538 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1539 start = self._prev 1540 exists = self._parse_exists() if allow_exists else None 1541 1542 self._match(TokenType.ON) 1543 1544 materialized = self._match_text_seq("MATERIALIZED") 1545 kind = self._match_set(self.CREATABLES) and self._prev 1546 if not kind: 1547 return self._parse_as_command(start) 1548 1549 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1550 this = self._parse_user_defined_function(kind=kind.token_type) 1551 elif kind.token_type == TokenType.TABLE: 1552 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1553 elif kind.token_type == TokenType.COLUMN: 1554 this = self._parse_column() 1555 else: 1556 this = self._parse_id_var() 1557 1558 self._match(TokenType.IS) 1559 1560 return self.expression( 1561 exp.Comment, 1562 this=this, 1563 kind=kind.text, 1564 expression=self._parse_string(), 1565 exists=exists, 1566 materialized=materialized, 1567 ) 1568 1569 def _parse_to_table( 1570 self, 1571 ) -> exp.ToTableProperty: 1572 table = self._parse_table_parts(schema=True) 1573 return self.expression(exp.ToTableProperty, this=table) 1574 1575 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1576 def _parse_ttl(self) -> exp.Expression: 1577 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1578 this = self._parse_bitwise() 1579 1580 if self._match_text_seq("DELETE"): 1581 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1582 if self._match_text_seq("RECOMPRESS"): 1583 return self.expression( 1584 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1585 ) 1586 if self._match_text_seq("TO", "DISK"): 1587 return self.expression( 1588 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1589 ) 1590 if self._match_text_seq("TO", "VOLUME"): 1591 return self.expression( 1592 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1593 ) 1594 1595 return this 1596 1597 expressions = self._parse_csv(_parse_ttl_action) 1598 where = self._parse_where() 1599 group = self._parse_group() 1600 1601 aggregates = None 1602 if group and self._match(TokenType.SET): 1603 aggregates = self._parse_csv(self._parse_set_item) 1604 1605 return self.expression( 1606 exp.MergeTreeTTL, 1607 expressions=expressions, 1608 where=where, 1609 group=group, 1610 aggregates=aggregates, 1611 ) 1612 1613 def _parse_statement(self) -> t.Optional[exp.Expression]: 1614 if self._curr is None: 1615 return None 1616 1617 if self._match_set(self.STATEMENT_PARSERS): 1618 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1619 1620 if self._match_set(self.dialect.tokenizer.COMMANDS): 1621 return self._parse_command() 1622 1623 expression = self._parse_expression() 1624 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1625 return self._parse_query_modifiers(expression) 1626 1627 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1628 start = self._prev 1629 temporary = self._match(TokenType.TEMPORARY) 1630 materialized = self._match_text_seq("MATERIALIZED") 1631 1632 kind = self._match_set(self.CREATABLES) and self._prev.text 1633 if not kind: 1634 return self._parse_as_command(start) 1635 1636 if_exists = exists or self._parse_exists() 1637 table = self._parse_table_parts( 1638 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1639 ) 1640 1641 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1642 1643 if self._match(TokenType.L_PAREN, advance=False): 1644 expressions = self._parse_wrapped_csv(self._parse_types) 1645 else: 1646 expressions = None 1647 1648 return self.expression( 1649 exp.Drop, 1650 comments=start.comments, 1651 exists=if_exists, 1652 this=table, 1653 expressions=expressions, 1654 kind=kind.upper(), 1655 temporary=temporary, 1656 materialized=materialized, 1657 cascade=self._match_text_seq("CASCADE"), 1658 constraints=self._match_text_seq("CONSTRAINTS"), 1659 purge=self._match_text_seq("PURGE"), 1660 cluster=cluster, 1661 ) 1662 1663 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1664 return ( 1665 self._match_text_seq("IF") 1666 and (not not_ or self._match(TokenType.NOT)) 1667 and self._match(TokenType.EXISTS) 1668 ) 1669 1670 def _parse_create(self) -> exp.Create | exp.Command: 1671 # Note: this can't be None because we've matched a statement parser 1672 start = self._prev 1673 comments = self._prev_comments 1674 1675 replace = ( 1676 start.token_type == TokenType.REPLACE 1677 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1678 or self._match_pair(TokenType.OR, TokenType.ALTER) 1679 ) 1680 1681 unique = self._match(TokenType.UNIQUE) 1682 1683 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1684 clustered = True 1685 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1686 "COLUMNSTORE" 1687 ): 1688 clustered = False 1689 else: 1690 clustered = None 1691 1692 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1693 self._advance() 1694 1695 properties = None 1696 create_token = self._match_set(self.CREATABLES) and self._prev 1697 1698 if not create_token: 1699 # exp.Properties.Location.POST_CREATE 1700 properties = self._parse_properties() 1701 create_token = self._match_set(self.CREATABLES) and self._prev 1702 1703 if not properties or not create_token: 1704 return self._parse_as_command(start) 1705 1706 concurrently = self._match_text_seq("CONCURRENTLY") 1707 exists = self._parse_exists(not_=True) 1708 this = None 1709 expression: t.Optional[exp.Expression] = None 1710 indexes = None 1711 no_schema_binding = None 1712 begin = None 1713 end = None 1714 clone = None 1715 1716 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1717 nonlocal properties 1718 if properties and temp_props: 1719 properties.expressions.extend(temp_props.expressions) 1720 elif temp_props: 1721 properties = temp_props 1722 1723 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1724 this = self._parse_user_defined_function(kind=create_token.token_type) 1725 1726 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1727 extend_props(self._parse_properties()) 1728 1729 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1730 extend_props(self._parse_properties()) 1731 1732 if not expression: 1733 if self._match(TokenType.COMMAND): 1734 expression = self._parse_as_command(self._prev) 1735 else: 1736 begin = self._match(TokenType.BEGIN) 1737 return_ = self._match_text_seq("RETURN") 1738 1739 if self._match(TokenType.STRING, advance=False): 1740 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1741 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1742 expression = self._parse_string() 1743 extend_props(self._parse_properties()) 1744 else: 1745 expression = self._parse_statement() 1746 1747 end = self._match_text_seq("END") 1748 1749 if return_: 1750 expression = self.expression(exp.Return, this=expression) 1751 elif create_token.token_type == TokenType.INDEX: 1752 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1753 if not self._match(TokenType.ON): 1754 index = self._parse_id_var() 1755 anonymous = False 1756 else: 1757 index = None 1758 anonymous = True 1759 1760 this = self._parse_index(index=index, anonymous=anonymous) 1761 elif create_token.token_type in self.DB_CREATABLES: 1762 table_parts = self._parse_table_parts( 1763 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1764 ) 1765 1766 # exp.Properties.Location.POST_NAME 1767 self._match(TokenType.COMMA) 1768 extend_props(self._parse_properties(before=True)) 1769 1770 this = self._parse_schema(this=table_parts) 1771 1772 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1773 extend_props(self._parse_properties()) 1774 1775 self._match(TokenType.ALIAS) 1776 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1777 # exp.Properties.Location.POST_ALIAS 1778 extend_props(self._parse_properties()) 1779 1780 if create_token.token_type == TokenType.SEQUENCE: 1781 expression = self._parse_types() 1782 extend_props(self._parse_properties()) 1783 else: 1784 expression = self._parse_ddl_select() 1785 1786 if create_token.token_type == TokenType.TABLE: 1787 # exp.Properties.Location.POST_EXPRESSION 1788 extend_props(self._parse_properties()) 1789 1790 indexes = [] 1791 while True: 1792 index = self._parse_index() 1793 1794 # exp.Properties.Location.POST_INDEX 1795 extend_props(self._parse_properties()) 1796 1797 if not index: 1798 break 1799 else: 1800 self._match(TokenType.COMMA) 1801 indexes.append(index) 1802 elif create_token.token_type == TokenType.VIEW: 1803 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1804 no_schema_binding = True 1805 1806 shallow = self._match_text_seq("SHALLOW") 1807 1808 if self._match_texts(self.CLONE_KEYWORDS): 1809 copy = self._prev.text.lower() == "copy" 1810 clone = self.expression( 1811 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1812 ) 1813 1814 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1815 return self._parse_as_command(start) 1816 1817 return self.expression( 1818 exp.Create, 1819 comments=comments, 1820 this=this, 1821 kind=create_token.text.upper(), 1822 replace=replace, 1823 unique=unique, 1824 expression=expression, 1825 exists=exists, 1826 properties=properties, 1827 indexes=indexes, 1828 no_schema_binding=no_schema_binding, 1829 begin=begin, 1830 end=end, 1831 clone=clone, 1832 concurrently=concurrently, 1833 clustered=clustered, 1834 ) 1835 1836 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1837 seq = exp.SequenceProperties() 1838 1839 options = [] 1840 index = self._index 1841 1842 while self._curr: 1843 self._match(TokenType.COMMA) 1844 if self._match_text_seq("INCREMENT"): 1845 self._match_text_seq("BY") 1846 self._match_text_seq("=") 1847 seq.set("increment", self._parse_term()) 1848 elif self._match_text_seq("MINVALUE"): 1849 seq.set("minvalue", self._parse_term()) 1850 elif self._match_text_seq("MAXVALUE"): 1851 seq.set("maxvalue", self._parse_term()) 1852 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1853 self._match_text_seq("=") 1854 seq.set("start", self._parse_term()) 1855 elif self._match_text_seq("CACHE"): 1856 # T-SQL allows empty CACHE which is initialized dynamically 1857 seq.set("cache", self._parse_number() or True) 1858 elif self._match_text_seq("OWNED", "BY"): 1859 # "OWNED BY NONE" is the default 1860 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1861 else: 1862 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1863 if opt: 1864 options.append(opt) 1865 else: 1866 break 1867 1868 seq.set("options", options if options else None) 1869 return None if self._index == index else seq 1870 1871 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1872 # only used for teradata currently 1873 self._match(TokenType.COMMA) 1874 1875 kwargs = { 1876 "no": self._match_text_seq("NO"), 1877 "dual": self._match_text_seq("DUAL"), 1878 "before": self._match_text_seq("BEFORE"), 1879 "default": self._match_text_seq("DEFAULT"), 1880 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1881 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1882 "after": self._match_text_seq("AFTER"), 1883 "minimum": self._match_texts(("MIN", "MINIMUM")), 1884 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1885 } 1886 1887 if self._match_texts(self.PROPERTY_PARSERS): 1888 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1889 try: 1890 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1891 except TypeError: 1892 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1893 1894 return None 1895 1896 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1897 return self._parse_wrapped_csv(self._parse_property) 1898 1899 def _parse_property(self) -> t.Optional[exp.Expression]: 1900 if self._match_texts(self.PROPERTY_PARSERS): 1901 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1902 1903 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1904 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1905 1906 if self._match_text_seq("COMPOUND", "SORTKEY"): 1907 return self._parse_sortkey(compound=True) 1908 1909 if self._match_text_seq("SQL", "SECURITY"): 1910 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1911 1912 index = self._index 1913 key = self._parse_column() 1914 1915 if not self._match(TokenType.EQ): 1916 self._retreat(index) 1917 return self._parse_sequence_properties() 1918 1919 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1920 if isinstance(key, exp.Column): 1921 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1922 1923 value = self._parse_bitwise() or self._parse_var(any_token=True) 1924 1925 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1926 if isinstance(value, exp.Column): 1927 value = exp.var(value.name) 1928 1929 return self.expression(exp.Property, this=key, value=value) 1930 1931 def _parse_stored(self) -> exp.FileFormatProperty: 1932 self._match(TokenType.ALIAS) 1933 1934 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1935 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1936 1937 return self.expression( 1938 exp.FileFormatProperty, 1939 this=( 1940 self.expression( 1941 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1942 ) 1943 if input_format or output_format 1944 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1945 ), 1946 ) 1947 1948 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1949 field = self._parse_field() 1950 if isinstance(field, exp.Identifier) and not field.quoted: 1951 field = exp.var(field) 1952 1953 return field 1954 1955 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1956 self._match(TokenType.EQ) 1957 self._match(TokenType.ALIAS) 1958 1959 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1960 1961 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1962 properties = [] 1963 while True: 1964 if before: 1965 prop = self._parse_property_before() 1966 else: 1967 prop = self._parse_property() 1968 if not prop: 1969 break 1970 for p in ensure_list(prop): 1971 properties.append(p) 1972 1973 if properties: 1974 return self.expression(exp.Properties, expressions=properties) 1975 1976 return None 1977 1978 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1979 return self.expression( 1980 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1981 ) 1982 1983 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1984 if self._index >= 2: 1985 pre_volatile_token = self._tokens[self._index - 2] 1986 else: 1987 pre_volatile_token = None 1988 1989 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1990 return exp.VolatileProperty() 1991 1992 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1993 1994 def _parse_retention_period(self) -> exp.Var: 1995 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1996 number = self._parse_number() 1997 number_str = f"{number} " if number else "" 1998 unit = self._parse_var(any_token=True) 1999 return exp.var(f"{number_str}{unit}") 2000 2001 def _parse_system_versioning_property( 2002 self, with_: bool = False 2003 ) -> exp.WithSystemVersioningProperty: 2004 self._match(TokenType.EQ) 2005 prop = self.expression( 2006 exp.WithSystemVersioningProperty, 2007 **{ # type: ignore 2008 "on": True, 2009 "with": with_, 2010 }, 2011 ) 2012 2013 if self._match_text_seq("OFF"): 2014 prop.set("on", False) 2015 return prop 2016 2017 self._match(TokenType.ON) 2018 if self._match(TokenType.L_PAREN): 2019 while self._curr and not self._match(TokenType.R_PAREN): 2020 if self._match_text_seq("HISTORY_TABLE", "="): 2021 prop.set("this", self._parse_table_parts()) 2022 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2023 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2024 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2025 prop.set("retention_period", self._parse_retention_period()) 2026 2027 self._match(TokenType.COMMA) 2028 2029 return prop 2030 2031 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2032 self._match(TokenType.EQ) 2033 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2034 prop = self.expression(exp.DataDeletionProperty, on=on) 2035 2036 if self._match(TokenType.L_PAREN): 2037 while self._curr and not self._match(TokenType.R_PAREN): 2038 if self._match_text_seq("FILTER_COLUMN", "="): 2039 prop.set("filter_column", self._parse_column()) 2040 elif self._match_text_seq("RETENTION_PERIOD", "="): 2041 prop.set("retention_period", self._parse_retention_period()) 2042 2043 self._match(TokenType.COMMA) 2044 2045 return prop 2046 2047 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2048 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2049 prop = self._parse_system_versioning_property(with_=True) 2050 self._match_r_paren() 2051 return prop 2052 2053 if self._match(TokenType.L_PAREN, advance=False): 2054 return self._parse_wrapped_properties() 2055 2056 if self._match_text_seq("JOURNAL"): 2057 return self._parse_withjournaltable() 2058 2059 if self._match_texts(self.VIEW_ATTRIBUTES): 2060 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2061 2062 if self._match_text_seq("DATA"): 2063 return self._parse_withdata(no=False) 2064 elif self._match_text_seq("NO", "DATA"): 2065 return self._parse_withdata(no=True) 2066 2067 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2068 return self._parse_serde_properties(with_=True) 2069 2070 if self._match(TokenType.SCHEMA): 2071 return self.expression( 2072 exp.WithSchemaBindingProperty, 2073 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2074 ) 2075 2076 if not self._next: 2077 return None 2078 2079 return self._parse_withisolatedloading() 2080 2081 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2082 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2083 self._match(TokenType.EQ) 2084 2085 user = self._parse_id_var() 2086 self._match(TokenType.PARAMETER) 2087 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2088 2089 if not user or not host: 2090 return None 2091 2092 return exp.DefinerProperty(this=f"{user}@{host}") 2093 2094 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2095 self._match(TokenType.TABLE) 2096 self._match(TokenType.EQ) 2097 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2098 2099 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2100 return self.expression(exp.LogProperty, no=no) 2101 2102 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2103 return self.expression(exp.JournalProperty, **kwargs) 2104 2105 def _parse_checksum(self) -> exp.ChecksumProperty: 2106 self._match(TokenType.EQ) 2107 2108 on = None 2109 if self._match(TokenType.ON): 2110 on = True 2111 elif self._match_text_seq("OFF"): 2112 on = False 2113 2114 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2115 2116 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2117 return self.expression( 2118 exp.Cluster, 2119 expressions=( 2120 self._parse_wrapped_csv(self._parse_ordered) 2121 if wrapped 2122 else self._parse_csv(self._parse_ordered) 2123 ), 2124 ) 2125 2126 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2127 self._match_text_seq("BY") 2128 2129 self._match_l_paren() 2130 expressions = self._parse_csv(self._parse_column) 2131 self._match_r_paren() 2132 2133 if self._match_text_seq("SORTED", "BY"): 2134 self._match_l_paren() 2135 sorted_by = self._parse_csv(self._parse_ordered) 2136 self._match_r_paren() 2137 else: 2138 sorted_by = None 2139 2140 self._match(TokenType.INTO) 2141 buckets = self._parse_number() 2142 self._match_text_seq("BUCKETS") 2143 2144 return self.expression( 2145 exp.ClusteredByProperty, 2146 expressions=expressions, 2147 sorted_by=sorted_by, 2148 buckets=buckets, 2149 ) 2150 2151 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2152 if not self._match_text_seq("GRANTS"): 2153 self._retreat(self._index - 1) 2154 return None 2155 2156 return self.expression(exp.CopyGrantsProperty) 2157 2158 def _parse_freespace(self) -> exp.FreespaceProperty: 2159 self._match(TokenType.EQ) 2160 return self.expression( 2161 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2162 ) 2163 2164 def _parse_mergeblockratio( 2165 self, no: bool = False, default: bool = False 2166 ) -> exp.MergeBlockRatioProperty: 2167 if self._match(TokenType.EQ): 2168 return self.expression( 2169 exp.MergeBlockRatioProperty, 2170 this=self._parse_number(), 2171 percent=self._match(TokenType.PERCENT), 2172 ) 2173 2174 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2175 2176 def _parse_datablocksize( 2177 self, 2178 default: t.Optional[bool] = None, 2179 minimum: t.Optional[bool] = None, 2180 maximum: t.Optional[bool] = None, 2181 ) -> exp.DataBlocksizeProperty: 2182 self._match(TokenType.EQ) 2183 size = self._parse_number() 2184 2185 units = None 2186 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2187 units = self._prev.text 2188 2189 return self.expression( 2190 exp.DataBlocksizeProperty, 2191 size=size, 2192 units=units, 2193 default=default, 2194 minimum=minimum, 2195 maximum=maximum, 2196 ) 2197 2198 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2199 self._match(TokenType.EQ) 2200 always = self._match_text_seq("ALWAYS") 2201 manual = self._match_text_seq("MANUAL") 2202 never = self._match_text_seq("NEVER") 2203 default = self._match_text_seq("DEFAULT") 2204 2205 autotemp = None 2206 if self._match_text_seq("AUTOTEMP"): 2207 autotemp = self._parse_schema() 2208 2209 return self.expression( 2210 exp.BlockCompressionProperty, 2211 always=always, 2212 manual=manual, 2213 never=never, 2214 default=default, 2215 autotemp=autotemp, 2216 ) 2217 2218 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2219 index = self._index 2220 no = self._match_text_seq("NO") 2221 concurrent = self._match_text_seq("CONCURRENT") 2222 2223 if not self._match_text_seq("ISOLATED", "LOADING"): 2224 self._retreat(index) 2225 return None 2226 2227 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2228 return self.expression( 2229 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2230 ) 2231 2232 def _parse_locking(self) -> exp.LockingProperty: 2233 if self._match(TokenType.TABLE): 2234 kind = "TABLE" 2235 elif self._match(TokenType.VIEW): 2236 kind = "VIEW" 2237 elif self._match(TokenType.ROW): 2238 kind = "ROW" 2239 elif self._match_text_seq("DATABASE"): 2240 kind = "DATABASE" 2241 else: 2242 kind = None 2243 2244 if kind in ("DATABASE", "TABLE", "VIEW"): 2245 this = self._parse_table_parts() 2246 else: 2247 this = None 2248 2249 if self._match(TokenType.FOR): 2250 for_or_in = "FOR" 2251 elif self._match(TokenType.IN): 2252 for_or_in = "IN" 2253 else: 2254 for_or_in = None 2255 2256 if self._match_text_seq("ACCESS"): 2257 lock_type = "ACCESS" 2258 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2259 lock_type = "EXCLUSIVE" 2260 elif self._match_text_seq("SHARE"): 2261 lock_type = "SHARE" 2262 elif self._match_text_seq("READ"): 2263 lock_type = "READ" 2264 elif self._match_text_seq("WRITE"): 2265 lock_type = "WRITE" 2266 elif self._match_text_seq("CHECKSUM"): 2267 lock_type = "CHECKSUM" 2268 else: 2269 lock_type = None 2270 2271 override = self._match_text_seq("OVERRIDE") 2272 2273 return self.expression( 2274 exp.LockingProperty, 2275 this=this, 2276 kind=kind, 2277 for_or_in=for_or_in, 2278 lock_type=lock_type, 2279 override=override, 2280 ) 2281 2282 def _parse_partition_by(self) -> t.List[exp.Expression]: 2283 if self._match(TokenType.PARTITION_BY): 2284 return self._parse_csv(self._parse_assignment) 2285 return [] 2286 2287 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2288 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2289 if self._match_text_seq("MINVALUE"): 2290 return exp.var("MINVALUE") 2291 if self._match_text_seq("MAXVALUE"): 2292 return exp.var("MAXVALUE") 2293 return self._parse_bitwise() 2294 2295 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2296 expression = None 2297 from_expressions = None 2298 to_expressions = None 2299 2300 if self._match(TokenType.IN): 2301 this = self._parse_wrapped_csv(self._parse_bitwise) 2302 elif self._match(TokenType.FROM): 2303 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2304 self._match_text_seq("TO") 2305 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2306 elif self._match_text_seq("WITH", "(", "MODULUS"): 2307 this = self._parse_number() 2308 self._match_text_seq(",", "REMAINDER") 2309 expression = self._parse_number() 2310 self._match_r_paren() 2311 else: 2312 self.raise_error("Failed to parse partition bound spec.") 2313 2314 return self.expression( 2315 exp.PartitionBoundSpec, 2316 this=this, 2317 expression=expression, 2318 from_expressions=from_expressions, 2319 to_expressions=to_expressions, 2320 ) 2321 2322 # https://www.postgresql.org/docs/current/sql-createtable.html 2323 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2324 if not self._match_text_seq("OF"): 2325 self._retreat(self._index - 1) 2326 return None 2327 2328 this = self._parse_table(schema=True) 2329 2330 if self._match(TokenType.DEFAULT): 2331 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2332 elif self._match_text_seq("FOR", "VALUES"): 2333 expression = self._parse_partition_bound_spec() 2334 else: 2335 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2336 2337 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2338 2339 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2340 self._match(TokenType.EQ) 2341 return self.expression( 2342 exp.PartitionedByProperty, 2343 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2344 ) 2345 2346 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2347 if self._match_text_seq("AND", "STATISTICS"): 2348 statistics = True 2349 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2350 statistics = False 2351 else: 2352 statistics = None 2353 2354 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2355 2356 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2357 if self._match_text_seq("SQL"): 2358 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2359 return None 2360 2361 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2362 if self._match_text_seq("SQL", "DATA"): 2363 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2364 return None 2365 2366 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2367 if self._match_text_seq("PRIMARY", "INDEX"): 2368 return exp.NoPrimaryIndexProperty() 2369 if self._match_text_seq("SQL"): 2370 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2371 return None 2372 2373 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2374 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2375 return exp.OnCommitProperty() 2376 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2377 return exp.OnCommitProperty(delete=True) 2378 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2379 2380 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2381 if self._match_text_seq("SQL", "DATA"): 2382 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2383 return None 2384 2385 def _parse_distkey(self) -> exp.DistKeyProperty: 2386 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2387 2388 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2389 table = self._parse_table(schema=True) 2390 2391 options = [] 2392 while self._match_texts(("INCLUDING", "EXCLUDING")): 2393 this = self._prev.text.upper() 2394 2395 id_var = self._parse_id_var() 2396 if not id_var: 2397 return None 2398 2399 options.append( 2400 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2401 ) 2402 2403 return self.expression(exp.LikeProperty, this=table, expressions=options) 2404 2405 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2406 return self.expression( 2407 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2408 ) 2409 2410 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2411 self._match(TokenType.EQ) 2412 return self.expression( 2413 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2414 ) 2415 2416 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2417 self._match_text_seq("WITH", "CONNECTION") 2418 return self.expression( 2419 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2420 ) 2421 2422 def _parse_returns(self) -> exp.ReturnsProperty: 2423 value: t.Optional[exp.Expression] 2424 null = None 2425 is_table = self._match(TokenType.TABLE) 2426 2427 if is_table: 2428 if self._match(TokenType.LT): 2429 value = self.expression( 2430 exp.Schema, 2431 this="TABLE", 2432 expressions=self._parse_csv(self._parse_struct_types), 2433 ) 2434 if not self._match(TokenType.GT): 2435 self.raise_error("Expecting >") 2436 else: 2437 value = self._parse_schema(exp.var("TABLE")) 2438 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2439 null = True 2440 value = None 2441 else: 2442 value = self._parse_types() 2443 2444 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2445 2446 def _parse_describe(self) -> exp.Describe: 2447 kind = self._match_set(self.CREATABLES) and self._prev.text 2448 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2449 if self._match(TokenType.DOT): 2450 style = None 2451 self._retreat(self._index - 2) 2452 this = self._parse_table(schema=True) 2453 properties = self._parse_properties() 2454 expressions = properties.expressions if properties else None 2455 return self.expression( 2456 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2457 ) 2458 2459 def _parse_insert(self) -> exp.Insert: 2460 comments = ensure_list(self._prev_comments) 2461 hint = self._parse_hint() 2462 overwrite = self._match(TokenType.OVERWRITE) 2463 ignore = self._match(TokenType.IGNORE) 2464 local = self._match_text_seq("LOCAL") 2465 alternative = None 2466 is_function = None 2467 2468 if self._match_text_seq("DIRECTORY"): 2469 this: t.Optional[exp.Expression] = self.expression( 2470 exp.Directory, 2471 this=self._parse_var_or_string(), 2472 local=local, 2473 row_format=self._parse_row_format(match_row=True), 2474 ) 2475 else: 2476 if self._match(TokenType.OR): 2477 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2478 2479 self._match(TokenType.INTO) 2480 comments += ensure_list(self._prev_comments) 2481 self._match(TokenType.TABLE) 2482 is_function = self._match(TokenType.FUNCTION) 2483 2484 this = ( 2485 self._parse_table(schema=True, parse_partition=True) 2486 if not is_function 2487 else self._parse_function() 2488 ) 2489 2490 returning = self._parse_returning() 2491 2492 return self.expression( 2493 exp.Insert, 2494 comments=comments, 2495 hint=hint, 2496 is_function=is_function, 2497 this=this, 2498 stored=self._match_text_seq("STORED") and self._parse_stored(), 2499 by_name=self._match_text_seq("BY", "NAME"), 2500 exists=self._parse_exists(), 2501 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2502 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2503 conflict=self._parse_on_conflict(), 2504 returning=returning or self._parse_returning(), 2505 overwrite=overwrite, 2506 alternative=alternative, 2507 ignore=ignore, 2508 ) 2509 2510 def _parse_kill(self) -> exp.Kill: 2511 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2512 2513 return self.expression( 2514 exp.Kill, 2515 this=self._parse_primary(), 2516 kind=kind, 2517 ) 2518 2519 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2520 conflict = self._match_text_seq("ON", "CONFLICT") 2521 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2522 2523 if not conflict and not duplicate: 2524 return None 2525 2526 conflict_keys = None 2527 constraint = None 2528 2529 if conflict: 2530 if self._match_text_seq("ON", "CONSTRAINT"): 2531 constraint = self._parse_id_var() 2532 elif self._match(TokenType.L_PAREN): 2533 conflict_keys = self._parse_csv(self._parse_id_var) 2534 self._match_r_paren() 2535 2536 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2537 if self._prev.token_type == TokenType.UPDATE: 2538 self._match(TokenType.SET) 2539 expressions = self._parse_csv(self._parse_equality) 2540 else: 2541 expressions = None 2542 2543 return self.expression( 2544 exp.OnConflict, 2545 duplicate=duplicate, 2546 expressions=expressions, 2547 action=action, 2548 conflict_keys=conflict_keys, 2549 constraint=constraint, 2550 ) 2551 2552 def _parse_returning(self) -> t.Optional[exp.Returning]: 2553 if not self._match(TokenType.RETURNING): 2554 return None 2555 return self.expression( 2556 exp.Returning, 2557 expressions=self._parse_csv(self._parse_expression), 2558 into=self._match(TokenType.INTO) and self._parse_table_part(), 2559 ) 2560 2561 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2562 if not self._match(TokenType.FORMAT): 2563 return None 2564 return self._parse_row_format() 2565 2566 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2567 index = self._index 2568 with_ = with_ or self._match_text_seq("WITH") 2569 2570 if not self._match(TokenType.SERDE_PROPERTIES): 2571 self._retreat(index) 2572 return None 2573 return self.expression( 2574 exp.SerdeProperties, 2575 **{ # type: ignore 2576 "expressions": self._parse_wrapped_properties(), 2577 "with": with_, 2578 }, 2579 ) 2580 2581 def _parse_row_format( 2582 self, match_row: bool = False 2583 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2584 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2585 return None 2586 2587 if self._match_text_seq("SERDE"): 2588 this = self._parse_string() 2589 2590 serde_properties = self._parse_serde_properties() 2591 2592 return self.expression( 2593 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2594 ) 2595 2596 self._match_text_seq("DELIMITED") 2597 2598 kwargs = {} 2599 2600 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2601 kwargs["fields"] = self._parse_string() 2602 if self._match_text_seq("ESCAPED", "BY"): 2603 kwargs["escaped"] = self._parse_string() 2604 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2605 kwargs["collection_items"] = self._parse_string() 2606 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2607 kwargs["map_keys"] = self._parse_string() 2608 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2609 kwargs["lines"] = self._parse_string() 2610 if self._match_text_seq("NULL", "DEFINED", "AS"): 2611 kwargs["null"] = self._parse_string() 2612 2613 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2614 2615 def _parse_load(self) -> exp.LoadData | exp.Command: 2616 if self._match_text_seq("DATA"): 2617 local = self._match_text_seq("LOCAL") 2618 self._match_text_seq("INPATH") 2619 inpath = self._parse_string() 2620 overwrite = self._match(TokenType.OVERWRITE) 2621 self._match_pair(TokenType.INTO, TokenType.TABLE) 2622 2623 return self.expression( 2624 exp.LoadData, 2625 this=self._parse_table(schema=True), 2626 local=local, 2627 overwrite=overwrite, 2628 inpath=inpath, 2629 partition=self._parse_partition(), 2630 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2631 serde=self._match_text_seq("SERDE") and self._parse_string(), 2632 ) 2633 return self._parse_as_command(self._prev) 2634 2635 def _parse_delete(self) -> exp.Delete: 2636 # This handles MySQL's "Multiple-Table Syntax" 2637 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2638 tables = None 2639 comments = self._prev_comments 2640 if not self._match(TokenType.FROM, advance=False): 2641 tables = self._parse_csv(self._parse_table) or None 2642 2643 returning = self._parse_returning() 2644 2645 return self.expression( 2646 exp.Delete, 2647 comments=comments, 2648 tables=tables, 2649 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2650 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2651 where=self._parse_where(), 2652 returning=returning or self._parse_returning(), 2653 limit=self._parse_limit(), 2654 ) 2655 2656 def _parse_update(self) -> exp.Update: 2657 comments = self._prev_comments 2658 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2659 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2660 returning = self._parse_returning() 2661 return self.expression( 2662 exp.Update, 2663 comments=comments, 2664 **{ # type: ignore 2665 "this": this, 2666 "expressions": expressions, 2667 "from": self._parse_from(joins=True), 2668 "where": self._parse_where(), 2669 "returning": returning or self._parse_returning(), 2670 "order": self._parse_order(), 2671 "limit": self._parse_limit(), 2672 }, 2673 ) 2674 2675 def _parse_uncache(self) -> exp.Uncache: 2676 if not self._match(TokenType.TABLE): 2677 self.raise_error("Expecting TABLE after UNCACHE") 2678 2679 return self.expression( 2680 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2681 ) 2682 2683 def _parse_cache(self) -> exp.Cache: 2684 lazy = self._match_text_seq("LAZY") 2685 self._match(TokenType.TABLE) 2686 table = self._parse_table(schema=True) 2687 2688 options = [] 2689 if self._match_text_seq("OPTIONS"): 2690 self._match_l_paren() 2691 k = self._parse_string() 2692 self._match(TokenType.EQ) 2693 v = self._parse_string() 2694 options = [k, v] 2695 self._match_r_paren() 2696 2697 self._match(TokenType.ALIAS) 2698 return self.expression( 2699 exp.Cache, 2700 this=table, 2701 lazy=lazy, 2702 options=options, 2703 expression=self._parse_select(nested=True), 2704 ) 2705 2706 def _parse_partition(self) -> t.Optional[exp.Partition]: 2707 if not self._match(TokenType.PARTITION): 2708 return None 2709 2710 return self.expression( 2711 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2712 ) 2713 2714 def _parse_value(self) -> t.Optional[exp.Tuple]: 2715 if self._match(TokenType.L_PAREN): 2716 expressions = self._parse_csv(self._parse_expression) 2717 self._match_r_paren() 2718 return self.expression(exp.Tuple, expressions=expressions) 2719 2720 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2721 expression = self._parse_expression() 2722 if expression: 2723 return self.expression(exp.Tuple, expressions=[expression]) 2724 return None 2725 2726 def _parse_projections(self) -> t.List[exp.Expression]: 2727 return self._parse_expressions() 2728 2729 def _parse_select( 2730 self, 2731 nested: bool = False, 2732 table: bool = False, 2733 parse_subquery_alias: bool = True, 2734 parse_set_operation: bool = True, 2735 ) -> t.Optional[exp.Expression]: 2736 cte = self._parse_with() 2737 2738 if cte: 2739 this = self._parse_statement() 2740 2741 if not this: 2742 self.raise_error("Failed to parse any statement following CTE") 2743 return cte 2744 2745 if "with" in this.arg_types: 2746 this.set("with", cte) 2747 else: 2748 self.raise_error(f"{this.key} does not support CTE") 2749 this = cte 2750 2751 return this 2752 2753 # duckdb supports leading with FROM x 2754 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2755 2756 if self._match(TokenType.SELECT): 2757 comments = self._prev_comments 2758 2759 hint = self._parse_hint() 2760 2761 if self._next and not self._next.token_type == TokenType.DOT: 2762 all_ = self._match(TokenType.ALL) 2763 distinct = self._match_set(self.DISTINCT_TOKENS) 2764 else: 2765 all_, distinct = None, None 2766 2767 kind = ( 2768 self._match(TokenType.ALIAS) 2769 and self._match_texts(("STRUCT", "VALUE")) 2770 and self._prev.text.upper() 2771 ) 2772 2773 if distinct: 2774 distinct = self.expression( 2775 exp.Distinct, 2776 on=self._parse_value() if self._match(TokenType.ON) else None, 2777 ) 2778 2779 if all_ and distinct: 2780 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2781 2782 limit = self._parse_limit(top=True) 2783 projections = self._parse_projections() 2784 2785 this = self.expression( 2786 exp.Select, 2787 kind=kind, 2788 hint=hint, 2789 distinct=distinct, 2790 expressions=projections, 2791 limit=limit, 2792 ) 2793 this.comments = comments 2794 2795 into = self._parse_into() 2796 if into: 2797 this.set("into", into) 2798 2799 if not from_: 2800 from_ = self._parse_from() 2801 2802 if from_: 2803 this.set("from", from_) 2804 2805 this = self._parse_query_modifiers(this) 2806 elif (table or nested) and self._match(TokenType.L_PAREN): 2807 if self._match(TokenType.PIVOT): 2808 this = self._parse_simplified_pivot() 2809 elif self._match(TokenType.FROM): 2810 this = exp.select("*").from_( 2811 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2812 ) 2813 else: 2814 this = ( 2815 self._parse_table() 2816 if table 2817 else self._parse_select(nested=True, parse_set_operation=False) 2818 ) 2819 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2820 2821 self._match_r_paren() 2822 2823 # We return early here so that the UNION isn't attached to the subquery by the 2824 # following call to _parse_set_operations, but instead becomes the parent node 2825 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2826 elif self._match(TokenType.VALUES, advance=False): 2827 this = self._parse_derived_table_values() 2828 elif from_: 2829 this = exp.select("*").from_(from_.this, copy=False) 2830 elif self._match(TokenType.SUMMARIZE): 2831 table = self._match(TokenType.TABLE) 2832 this = self._parse_select() or self._parse_string() or self._parse_table() 2833 return self.expression(exp.Summarize, this=this, table=table) 2834 else: 2835 this = None 2836 2837 return self._parse_set_operations(this) if parse_set_operation else this 2838 2839 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2840 if not skip_with_token and not self._match(TokenType.WITH): 2841 return None 2842 2843 comments = self._prev_comments 2844 recursive = self._match(TokenType.RECURSIVE) 2845 2846 expressions = [] 2847 while True: 2848 expressions.append(self._parse_cte()) 2849 2850 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2851 break 2852 else: 2853 self._match(TokenType.WITH) 2854 2855 return self.expression( 2856 exp.With, comments=comments, expressions=expressions, recursive=recursive 2857 ) 2858 2859 def _parse_cte(self) -> exp.CTE: 2860 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2861 if not alias or not alias.this: 2862 self.raise_error("Expected CTE to have alias") 2863 2864 self._match(TokenType.ALIAS) 2865 comments = self._prev_comments 2866 2867 if self._match_text_seq("NOT", "MATERIALIZED"): 2868 materialized = False 2869 elif self._match_text_seq("MATERIALIZED"): 2870 materialized = True 2871 else: 2872 materialized = None 2873 2874 return self.expression( 2875 exp.CTE, 2876 this=self._parse_wrapped(self._parse_statement), 2877 alias=alias, 2878 materialized=materialized, 2879 comments=comments, 2880 ) 2881 2882 def _parse_table_alias( 2883 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2884 ) -> t.Optional[exp.TableAlias]: 2885 any_token = self._match(TokenType.ALIAS) 2886 alias = ( 2887 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2888 or self._parse_string_as_identifier() 2889 ) 2890 2891 index = self._index 2892 if self._match(TokenType.L_PAREN): 2893 columns = self._parse_csv(self._parse_function_parameter) 2894 self._match_r_paren() if columns else self._retreat(index) 2895 else: 2896 columns = None 2897 2898 if not alias and not columns: 2899 return None 2900 2901 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2902 2903 # We bubble up comments from the Identifier to the TableAlias 2904 if isinstance(alias, exp.Identifier): 2905 table_alias.add_comments(alias.pop_comments()) 2906 2907 return table_alias 2908 2909 def _parse_subquery( 2910 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2911 ) -> t.Optional[exp.Subquery]: 2912 if not this: 2913 return None 2914 2915 return self.expression( 2916 exp.Subquery, 2917 this=this, 2918 pivots=self._parse_pivots(), 2919 alias=self._parse_table_alias() if parse_alias else None, 2920 ) 2921 2922 def _implicit_unnests_to_explicit(self, this: E) -> E: 2923 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2924 2925 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2926 for i, join in enumerate(this.args.get("joins") or []): 2927 table = join.this 2928 normalized_table = table.copy() 2929 normalized_table.meta["maybe_column"] = True 2930 normalized_table = _norm(normalized_table, dialect=self.dialect) 2931 2932 if isinstance(table, exp.Table) and not join.args.get("on"): 2933 if normalized_table.parts[0].name in refs: 2934 table_as_column = table.to_column() 2935 unnest = exp.Unnest(expressions=[table_as_column]) 2936 2937 # Table.to_column creates a parent Alias node that we want to convert to 2938 # a TableAlias and attach to the Unnest, so it matches the parser's output 2939 if isinstance(table.args.get("alias"), exp.TableAlias): 2940 table_as_column.replace(table_as_column.this) 2941 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2942 2943 table.replace(unnest) 2944 2945 refs.add(normalized_table.alias_or_name) 2946 2947 return this 2948 2949 def _parse_query_modifiers( 2950 self, this: t.Optional[exp.Expression] 2951 ) -> t.Optional[exp.Expression]: 2952 if isinstance(this, (exp.Query, exp.Table)): 2953 for join in self._parse_joins(): 2954 this.append("joins", join) 2955 for lateral in iter(self._parse_lateral, None): 2956 this.append("laterals", lateral) 2957 2958 while True: 2959 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2960 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2961 key, expression = parser(self) 2962 2963 if expression: 2964 this.set(key, expression) 2965 if key == "limit": 2966 offset = expression.args.pop("offset", None) 2967 2968 if offset: 2969 offset = exp.Offset(expression=offset) 2970 this.set("offset", offset) 2971 2972 limit_by_expressions = expression.expressions 2973 expression.set("expressions", None) 2974 offset.set("expressions", limit_by_expressions) 2975 continue 2976 break 2977 2978 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2979 this = self._implicit_unnests_to_explicit(this) 2980 2981 return this 2982 2983 def _parse_hint(self) -> t.Optional[exp.Hint]: 2984 if self._match(TokenType.HINT): 2985 hints = [] 2986 for hint in iter( 2987 lambda: self._parse_csv( 2988 lambda: self._parse_function() or self._parse_var(upper=True) 2989 ), 2990 [], 2991 ): 2992 hints.extend(hint) 2993 2994 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2995 self.raise_error("Expected */ after HINT") 2996 2997 return self.expression(exp.Hint, expressions=hints) 2998 2999 return None 3000 3001 def _parse_into(self) -> t.Optional[exp.Into]: 3002 if not self._match(TokenType.INTO): 3003 return None 3004 3005 temp = self._match(TokenType.TEMPORARY) 3006 unlogged = self._match_text_seq("UNLOGGED") 3007 self._match(TokenType.TABLE) 3008 3009 return self.expression( 3010 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3011 ) 3012 3013 def _parse_from( 3014 self, joins: bool = False, skip_from_token: bool = False 3015 ) -> t.Optional[exp.From]: 3016 if not skip_from_token and not self._match(TokenType.FROM): 3017 return None 3018 3019 return self.expression( 3020 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3021 ) 3022 3023 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3024 return self.expression( 3025 exp.MatchRecognizeMeasure, 3026 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3027 this=self._parse_expression(), 3028 ) 3029 3030 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3031 if not self._match(TokenType.MATCH_RECOGNIZE): 3032 return None 3033 3034 self._match_l_paren() 3035 3036 partition = self._parse_partition_by() 3037 order = self._parse_order() 3038 3039 measures = ( 3040 self._parse_csv(self._parse_match_recognize_measure) 3041 if self._match_text_seq("MEASURES") 3042 else None 3043 ) 3044 3045 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3046 rows = exp.var("ONE ROW PER MATCH") 3047 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3048 text = "ALL ROWS PER MATCH" 3049 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3050 text += " SHOW EMPTY MATCHES" 3051 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3052 text += " OMIT EMPTY MATCHES" 3053 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3054 text += " WITH UNMATCHED ROWS" 3055 rows = exp.var(text) 3056 else: 3057 rows = None 3058 3059 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3060 text = "AFTER MATCH SKIP" 3061 if self._match_text_seq("PAST", "LAST", "ROW"): 3062 text += " PAST LAST ROW" 3063 elif self._match_text_seq("TO", "NEXT", "ROW"): 3064 text += " TO NEXT ROW" 3065 elif self._match_text_seq("TO", "FIRST"): 3066 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3067 elif self._match_text_seq("TO", "LAST"): 3068 text += f" TO LAST {self._advance_any().text}" # type: ignore 3069 after = exp.var(text) 3070 else: 3071 after = None 3072 3073 if self._match_text_seq("PATTERN"): 3074 self._match_l_paren() 3075 3076 if not self._curr: 3077 self.raise_error("Expecting )", self._curr) 3078 3079 paren = 1 3080 start = self._curr 3081 3082 while self._curr and paren > 0: 3083 if self._curr.token_type == TokenType.L_PAREN: 3084 paren += 1 3085 if self._curr.token_type == TokenType.R_PAREN: 3086 paren -= 1 3087 3088 end = self._prev 3089 self._advance() 3090 3091 if paren > 0: 3092 self.raise_error("Expecting )", self._curr) 3093 3094 pattern = exp.var(self._find_sql(start, end)) 3095 else: 3096 pattern = None 3097 3098 define = ( 3099 self._parse_csv(self._parse_name_as_expression) 3100 if self._match_text_seq("DEFINE") 3101 else None 3102 ) 3103 3104 self._match_r_paren() 3105 3106 return self.expression( 3107 exp.MatchRecognize, 3108 partition_by=partition, 3109 order=order, 3110 measures=measures, 3111 rows=rows, 3112 after=after, 3113 pattern=pattern, 3114 define=define, 3115 alias=self._parse_table_alias(), 3116 ) 3117 3118 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3119 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3120 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3121 cross_apply = False 3122 3123 if cross_apply is not None: 3124 this = self._parse_select(table=True) 3125 view = None 3126 outer = None 3127 elif self._match(TokenType.LATERAL): 3128 this = self._parse_select(table=True) 3129 view = self._match(TokenType.VIEW) 3130 outer = self._match(TokenType.OUTER) 3131 else: 3132 return None 3133 3134 if not this: 3135 this = ( 3136 self._parse_unnest() 3137 or self._parse_function() 3138 or self._parse_id_var(any_token=False) 3139 ) 3140 3141 while self._match(TokenType.DOT): 3142 this = exp.Dot( 3143 this=this, 3144 expression=self._parse_function() or self._parse_id_var(any_token=False), 3145 ) 3146 3147 if view: 3148 table = self._parse_id_var(any_token=False) 3149 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3150 table_alias: t.Optional[exp.TableAlias] = self.expression( 3151 exp.TableAlias, this=table, columns=columns 3152 ) 3153 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3154 # We move the alias from the lateral's child node to the lateral itself 3155 table_alias = this.args["alias"].pop() 3156 else: 3157 table_alias = self._parse_table_alias() 3158 3159 return self.expression( 3160 exp.Lateral, 3161 this=this, 3162 view=view, 3163 outer=outer, 3164 alias=table_alias, 3165 cross_apply=cross_apply, 3166 ) 3167 3168 def _parse_join_parts( 3169 self, 3170 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3171 return ( 3172 self._match_set(self.JOIN_METHODS) and self._prev, 3173 self._match_set(self.JOIN_SIDES) and self._prev, 3174 self._match_set(self.JOIN_KINDS) and self._prev, 3175 ) 3176 3177 def _parse_join( 3178 self, skip_join_token: bool = False, parse_bracket: bool = False 3179 ) -> t.Optional[exp.Join]: 3180 if self._match(TokenType.COMMA): 3181 return self.expression(exp.Join, this=self._parse_table()) 3182 3183 index = self._index 3184 method, side, kind = self._parse_join_parts() 3185 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3186 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3187 3188 if not skip_join_token and not join: 3189 self._retreat(index) 3190 kind = None 3191 method = None 3192 side = None 3193 3194 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3195 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3196 3197 if not skip_join_token and not join and not outer_apply and not cross_apply: 3198 return None 3199 3200 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3201 3202 if method: 3203 kwargs["method"] = method.text 3204 if side: 3205 kwargs["side"] = side.text 3206 if kind: 3207 kwargs["kind"] = kind.text 3208 if hint: 3209 kwargs["hint"] = hint 3210 3211 if self._match(TokenType.MATCH_CONDITION): 3212 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3213 3214 if self._match(TokenType.ON): 3215 kwargs["on"] = self._parse_assignment() 3216 elif self._match(TokenType.USING): 3217 kwargs["using"] = self._parse_wrapped_id_vars() 3218 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3219 kind and kind.token_type == TokenType.CROSS 3220 ): 3221 index = self._index 3222 joins: t.Optional[list] = list(self._parse_joins()) 3223 3224 if joins and self._match(TokenType.ON): 3225 kwargs["on"] = self._parse_assignment() 3226 elif joins and self._match(TokenType.USING): 3227 kwargs["using"] = self._parse_wrapped_id_vars() 3228 else: 3229 joins = None 3230 self._retreat(index) 3231 3232 kwargs["this"].set("joins", joins if joins else None) 3233 3234 comments = [c for token in (method, side, kind) if token for c in token.comments] 3235 return self.expression(exp.Join, comments=comments, **kwargs) 3236 3237 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3238 this = self._parse_assignment() 3239 3240 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3241 return this 3242 3243 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3244 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3245 3246 return this 3247 3248 def _parse_index_params(self) -> exp.IndexParameters: 3249 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3250 3251 if self._match(TokenType.L_PAREN, advance=False): 3252 columns = self._parse_wrapped_csv(self._parse_with_operator) 3253 else: 3254 columns = None 3255 3256 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3257 partition_by = self._parse_partition_by() 3258 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3259 tablespace = ( 3260 self._parse_var(any_token=True) 3261 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3262 else None 3263 ) 3264 where = self._parse_where() 3265 3266 on = self._parse_field() if self._match(TokenType.ON) else None 3267 3268 return self.expression( 3269 exp.IndexParameters, 3270 using=using, 3271 columns=columns, 3272 include=include, 3273 partition_by=partition_by, 3274 where=where, 3275 with_storage=with_storage, 3276 tablespace=tablespace, 3277 on=on, 3278 ) 3279 3280 def _parse_index( 3281 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3282 ) -> t.Optional[exp.Index]: 3283 if index or anonymous: 3284 unique = None 3285 primary = None 3286 amp = None 3287 3288 self._match(TokenType.ON) 3289 self._match(TokenType.TABLE) # hive 3290 table = self._parse_table_parts(schema=True) 3291 else: 3292 unique = self._match(TokenType.UNIQUE) 3293 primary = self._match_text_seq("PRIMARY") 3294 amp = self._match_text_seq("AMP") 3295 3296 if not self._match(TokenType.INDEX): 3297 return None 3298 3299 index = self._parse_id_var() 3300 table = None 3301 3302 params = self._parse_index_params() 3303 3304 return self.expression( 3305 exp.Index, 3306 this=index, 3307 table=table, 3308 unique=unique, 3309 primary=primary, 3310 amp=amp, 3311 params=params, 3312 ) 3313 3314 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3315 hints: t.List[exp.Expression] = [] 3316 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3317 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3318 hints.append( 3319 self.expression( 3320 exp.WithTableHint, 3321 expressions=self._parse_csv( 3322 lambda: self._parse_function() or self._parse_var(any_token=True) 3323 ), 3324 ) 3325 ) 3326 self._match_r_paren() 3327 else: 3328 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3329 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3330 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3331 3332 self._match_set((TokenType.INDEX, TokenType.KEY)) 3333 if self._match(TokenType.FOR): 3334 hint.set("target", self._advance_any() and self._prev.text.upper()) 3335 3336 hint.set("expressions", self._parse_wrapped_id_vars()) 3337 hints.append(hint) 3338 3339 return hints or None 3340 3341 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3342 return ( 3343 (not schema and self._parse_function(optional_parens=False)) 3344 or self._parse_id_var(any_token=False) 3345 or self._parse_string_as_identifier() 3346 or self._parse_placeholder() 3347 ) 3348 3349 def _parse_table_parts( 3350 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3351 ) -> exp.Table: 3352 catalog = None 3353 db = None 3354 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3355 3356 while self._match(TokenType.DOT): 3357 if catalog: 3358 # This allows nesting the table in arbitrarily many dot expressions if needed 3359 table = self.expression( 3360 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3361 ) 3362 else: 3363 catalog = db 3364 db = table 3365 # "" used for tsql FROM a..b case 3366 table = self._parse_table_part(schema=schema) or "" 3367 3368 if ( 3369 wildcard 3370 and self._is_connected() 3371 and (isinstance(table, exp.Identifier) or not table) 3372 and self._match(TokenType.STAR) 3373 ): 3374 if isinstance(table, exp.Identifier): 3375 table.args["this"] += "*" 3376 else: 3377 table = exp.Identifier(this="*") 3378 3379 # We bubble up comments from the Identifier to the Table 3380 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3381 3382 if is_db_reference: 3383 catalog = db 3384 db = table 3385 table = None 3386 3387 if not table and not is_db_reference: 3388 self.raise_error(f"Expected table name but got {self._curr}") 3389 if not db and is_db_reference: 3390 self.raise_error(f"Expected database name but got {self._curr}") 3391 3392 table = self.expression( 3393 exp.Table, 3394 comments=comments, 3395 this=table, 3396 db=db, 3397 catalog=catalog, 3398 ) 3399 3400 changes = self._parse_changes() 3401 if changes: 3402 table.set("changes", changes) 3403 3404 at_before = self._parse_historical_data() 3405 if at_before: 3406 table.set("when", at_before) 3407 3408 pivots = self._parse_pivots() 3409 if pivots: 3410 table.set("pivots", pivots) 3411 3412 return table 3413 3414 def _parse_table( 3415 self, 3416 schema: bool = False, 3417 joins: bool = False, 3418 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3419 parse_bracket: bool = False, 3420 is_db_reference: bool = False, 3421 parse_partition: bool = False, 3422 ) -> t.Optional[exp.Expression]: 3423 lateral = self._parse_lateral() 3424 if lateral: 3425 return lateral 3426 3427 unnest = self._parse_unnest() 3428 if unnest: 3429 return unnest 3430 3431 values = self._parse_derived_table_values() 3432 if values: 3433 return values 3434 3435 subquery = self._parse_select(table=True) 3436 if subquery: 3437 if not subquery.args.get("pivots"): 3438 subquery.set("pivots", self._parse_pivots()) 3439 return subquery 3440 3441 bracket = parse_bracket and self._parse_bracket(None) 3442 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3443 3444 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3445 self._parse_table 3446 ) 3447 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3448 3449 only = self._match(TokenType.ONLY) 3450 3451 this = t.cast( 3452 exp.Expression, 3453 bracket 3454 or rows_from 3455 or self._parse_bracket( 3456 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3457 ), 3458 ) 3459 3460 if only: 3461 this.set("only", only) 3462 3463 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3464 self._match_text_seq("*") 3465 3466 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3467 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3468 this.set("partition", self._parse_partition()) 3469 3470 if schema: 3471 return self._parse_schema(this=this) 3472 3473 version = self._parse_version() 3474 3475 if version: 3476 this.set("version", version) 3477 3478 if self.dialect.ALIAS_POST_TABLESAMPLE: 3479 table_sample = self._parse_table_sample() 3480 3481 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3482 if alias: 3483 this.set("alias", alias) 3484 3485 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3486 return self.expression( 3487 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3488 ) 3489 3490 this.set("hints", self._parse_table_hints()) 3491 3492 if not this.args.get("pivots"): 3493 this.set("pivots", self._parse_pivots()) 3494 3495 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3496 table_sample = self._parse_table_sample() 3497 3498 if table_sample: 3499 table_sample.set("this", this) 3500 this = table_sample 3501 3502 if joins: 3503 for join in self._parse_joins(): 3504 this.append("joins", join) 3505 3506 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3507 this.set("ordinality", True) 3508 this.set("alias", self._parse_table_alias()) 3509 3510 return this 3511 3512 def _parse_version(self) -> t.Optional[exp.Version]: 3513 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3514 this = "TIMESTAMP" 3515 elif self._match(TokenType.VERSION_SNAPSHOT): 3516 this = "VERSION" 3517 else: 3518 return None 3519 3520 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3521 kind = self._prev.text.upper() 3522 start = self._parse_bitwise() 3523 self._match_texts(("TO", "AND")) 3524 end = self._parse_bitwise() 3525 expression: t.Optional[exp.Expression] = self.expression( 3526 exp.Tuple, expressions=[start, end] 3527 ) 3528 elif self._match_text_seq("CONTAINED", "IN"): 3529 kind = "CONTAINED IN" 3530 expression = self.expression( 3531 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3532 ) 3533 elif self._match(TokenType.ALL): 3534 kind = "ALL" 3535 expression = None 3536 else: 3537 self._match_text_seq("AS", "OF") 3538 kind = "AS OF" 3539 expression = self._parse_type() 3540 3541 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3542 3543 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3544 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3545 index = self._index 3546 historical_data = None 3547 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3548 this = self._prev.text.upper() 3549 kind = ( 3550 self._match(TokenType.L_PAREN) 3551 and self._match_texts(self.HISTORICAL_DATA_KIND) 3552 and self._prev.text.upper() 3553 ) 3554 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3555 3556 if expression: 3557 self._match_r_paren() 3558 historical_data = self.expression( 3559 exp.HistoricalData, this=this, kind=kind, expression=expression 3560 ) 3561 else: 3562 self._retreat(index) 3563 3564 return historical_data 3565 3566 def _parse_changes(self) -> t.Optional[exp.Changes]: 3567 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3568 return None 3569 3570 information = self._parse_var(any_token=True) 3571 self._match_r_paren() 3572 3573 return self.expression( 3574 exp.Changes, 3575 information=information, 3576 at_before=self._parse_historical_data(), 3577 end=self._parse_historical_data(), 3578 ) 3579 3580 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3581 if not self._match(TokenType.UNNEST): 3582 return None 3583 3584 expressions = self._parse_wrapped_csv(self._parse_equality) 3585 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3586 3587 alias = self._parse_table_alias() if with_alias else None 3588 3589 if alias: 3590 if self.dialect.UNNEST_COLUMN_ONLY: 3591 if alias.args.get("columns"): 3592 self.raise_error("Unexpected extra column alias in unnest.") 3593 3594 alias.set("columns", [alias.this]) 3595 alias.set("this", None) 3596 3597 columns = alias.args.get("columns") or [] 3598 if offset and len(expressions) < len(columns): 3599 offset = columns.pop() 3600 3601 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3602 self._match(TokenType.ALIAS) 3603 offset = self._parse_id_var( 3604 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3605 ) or exp.to_identifier("offset") 3606 3607 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3608 3609 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3610 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3611 if not is_derived and not self._match_text_seq("VALUES"): 3612 return None 3613 3614 expressions = self._parse_csv(self._parse_value) 3615 alias = self._parse_table_alias() 3616 3617 if is_derived: 3618 self._match_r_paren() 3619 3620 return self.expression( 3621 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3622 ) 3623 3624 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3625 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3626 as_modifier and self._match_text_seq("USING", "SAMPLE") 3627 ): 3628 return None 3629 3630 bucket_numerator = None 3631 bucket_denominator = None 3632 bucket_field = None 3633 percent = None 3634 size = None 3635 seed = None 3636 3637 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3638 matched_l_paren = self._match(TokenType.L_PAREN) 3639 3640 if self.TABLESAMPLE_CSV: 3641 num = None 3642 expressions = self._parse_csv(self._parse_primary) 3643 else: 3644 expressions = None 3645 num = ( 3646 self._parse_factor() 3647 if self._match(TokenType.NUMBER, advance=False) 3648 else self._parse_primary() or self._parse_placeholder() 3649 ) 3650 3651 if self._match_text_seq("BUCKET"): 3652 bucket_numerator = self._parse_number() 3653 self._match_text_seq("OUT", "OF") 3654 bucket_denominator = bucket_denominator = self._parse_number() 3655 self._match(TokenType.ON) 3656 bucket_field = self._parse_field() 3657 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3658 percent = num 3659 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3660 size = num 3661 else: 3662 percent = num 3663 3664 if matched_l_paren: 3665 self._match_r_paren() 3666 3667 if self._match(TokenType.L_PAREN): 3668 method = self._parse_var(upper=True) 3669 seed = self._match(TokenType.COMMA) and self._parse_number() 3670 self._match_r_paren() 3671 elif self._match_texts(("SEED", "REPEATABLE")): 3672 seed = self._parse_wrapped(self._parse_number) 3673 3674 if not method and self.DEFAULT_SAMPLING_METHOD: 3675 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3676 3677 return self.expression( 3678 exp.TableSample, 3679 expressions=expressions, 3680 method=method, 3681 bucket_numerator=bucket_numerator, 3682 bucket_denominator=bucket_denominator, 3683 bucket_field=bucket_field, 3684 percent=percent, 3685 size=size, 3686 seed=seed, 3687 ) 3688 3689 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3690 return list(iter(self._parse_pivot, None)) or None 3691 3692 def _parse_joins(self) -> t.Iterator[exp.Join]: 3693 return iter(self._parse_join, None) 3694 3695 # https://duckdb.org/docs/sql/statements/pivot 3696 def _parse_simplified_pivot(self) -> exp.Pivot: 3697 def _parse_on() -> t.Optional[exp.Expression]: 3698 this = self._parse_bitwise() 3699 return self._parse_in(this) if self._match(TokenType.IN) else this 3700 3701 this = self._parse_table() 3702 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3703 using = self._match(TokenType.USING) and self._parse_csv( 3704 lambda: self._parse_alias(self._parse_function()) 3705 ) 3706 group = self._parse_group() 3707 return self.expression( 3708 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3709 ) 3710 3711 def _parse_pivot_in(self) -> exp.In: 3712 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3713 this = self._parse_assignment() 3714 3715 self._match(TokenType.ALIAS) 3716 alias = self._parse_field() 3717 if alias: 3718 return self.expression(exp.PivotAlias, this=this, alias=alias) 3719 3720 return this 3721 3722 value = self._parse_column() 3723 3724 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3725 self.raise_error("Expecting IN (") 3726 3727 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3728 3729 self._match_r_paren() 3730 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3731 3732 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3733 index = self._index 3734 include_nulls = None 3735 3736 if self._match(TokenType.PIVOT): 3737 unpivot = False 3738 elif self._match(TokenType.UNPIVOT): 3739 unpivot = True 3740 3741 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3742 if self._match_text_seq("INCLUDE", "NULLS"): 3743 include_nulls = True 3744 elif self._match_text_seq("EXCLUDE", "NULLS"): 3745 include_nulls = False 3746 else: 3747 return None 3748 3749 expressions = [] 3750 3751 if not self._match(TokenType.L_PAREN): 3752 self._retreat(index) 3753 return None 3754 3755 if unpivot: 3756 expressions = self._parse_csv(self._parse_column) 3757 else: 3758 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3759 3760 if not expressions: 3761 self.raise_error("Failed to parse PIVOT's aggregation list") 3762 3763 if not self._match(TokenType.FOR): 3764 self.raise_error("Expecting FOR") 3765 3766 field = self._parse_pivot_in() 3767 3768 self._match_r_paren() 3769 3770 pivot = self.expression( 3771 exp.Pivot, 3772 expressions=expressions, 3773 field=field, 3774 unpivot=unpivot, 3775 include_nulls=include_nulls, 3776 ) 3777 3778 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3779 pivot.set("alias", self._parse_table_alias()) 3780 3781 if not unpivot: 3782 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3783 3784 columns: t.List[exp.Expression] = [] 3785 for fld in pivot.args["field"].expressions: 3786 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3787 for name in names: 3788 if self.PREFIXED_PIVOT_COLUMNS: 3789 name = f"{name}_{field_name}" if name else field_name 3790 else: 3791 name = f"{field_name}_{name}" if name else field_name 3792 3793 columns.append(exp.to_identifier(name)) 3794 3795 pivot.set("columns", columns) 3796 3797 return pivot 3798 3799 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3800 return [agg.alias for agg in aggregations] 3801 3802 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3803 if not skip_where_token and not self._match(TokenType.PREWHERE): 3804 return None 3805 3806 return self.expression( 3807 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3808 ) 3809 3810 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3811 if not skip_where_token and not self._match(TokenType.WHERE): 3812 return None 3813 3814 return self.expression( 3815 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3816 ) 3817 3818 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3819 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3820 return None 3821 3822 elements: t.Dict[str, t.Any] = defaultdict(list) 3823 3824 if self._match(TokenType.ALL): 3825 elements["all"] = True 3826 elif self._match(TokenType.DISTINCT): 3827 elements["all"] = False 3828 3829 while True: 3830 expressions = self._parse_csv( 3831 lambda: None 3832 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3833 else self._parse_assignment() 3834 ) 3835 if expressions: 3836 elements["expressions"].extend(expressions) 3837 3838 grouping_sets = self._parse_grouping_sets() 3839 if grouping_sets: 3840 elements["grouping_sets"].extend(grouping_sets) 3841 3842 rollup = None 3843 cube = None 3844 totals = None 3845 3846 index = self._index 3847 with_ = self._match(TokenType.WITH) 3848 if self._match(TokenType.ROLLUP): 3849 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3850 elements["rollup"].extend(ensure_list(rollup)) 3851 3852 if self._match(TokenType.CUBE): 3853 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3854 elements["cube"].extend(ensure_list(cube)) 3855 3856 if self._match_text_seq("TOTALS"): 3857 totals = True 3858 elements["totals"] = True # type: ignore 3859 3860 if not (grouping_sets or rollup or cube or totals): 3861 if with_: 3862 self._retreat(index) 3863 break 3864 3865 return self.expression(exp.Group, **elements) # type: ignore 3866 3867 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3868 if not self._match(TokenType.GROUPING_SETS): 3869 return None 3870 3871 return self._parse_wrapped_csv(self._parse_grouping_set) 3872 3873 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3874 if self._match(TokenType.L_PAREN): 3875 grouping_set = self._parse_csv(self._parse_column) 3876 self._match_r_paren() 3877 return self.expression(exp.Tuple, expressions=grouping_set) 3878 3879 return self._parse_column() 3880 3881 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3882 if not skip_having_token and not self._match(TokenType.HAVING): 3883 return None 3884 return self.expression(exp.Having, this=self._parse_assignment()) 3885 3886 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3887 if not self._match(TokenType.QUALIFY): 3888 return None 3889 return self.expression(exp.Qualify, this=self._parse_assignment()) 3890 3891 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3892 if skip_start_token: 3893 start = None 3894 elif self._match(TokenType.START_WITH): 3895 start = self._parse_assignment() 3896 else: 3897 return None 3898 3899 self._match(TokenType.CONNECT_BY) 3900 nocycle = self._match_text_seq("NOCYCLE") 3901 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3902 exp.Prior, this=self._parse_bitwise() 3903 ) 3904 connect = self._parse_assignment() 3905 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3906 3907 if not start and self._match(TokenType.START_WITH): 3908 start = self._parse_assignment() 3909 3910 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3911 3912 def _parse_name_as_expression(self) -> exp.Alias: 3913 return self.expression( 3914 exp.Alias, 3915 alias=self._parse_id_var(any_token=True), 3916 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3917 ) 3918 3919 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3920 if self._match_text_seq("INTERPOLATE"): 3921 return self._parse_wrapped_csv(self._parse_name_as_expression) 3922 return None 3923 3924 def _parse_order( 3925 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3926 ) -> t.Optional[exp.Expression]: 3927 siblings = None 3928 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3929 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3930 return this 3931 3932 siblings = True 3933 3934 return self.expression( 3935 exp.Order, 3936 this=this, 3937 expressions=self._parse_csv(self._parse_ordered), 3938 interpolate=self._parse_interpolate(), 3939 siblings=siblings, 3940 ) 3941 3942 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3943 if not self._match(token): 3944 return None 3945 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3946 3947 def _parse_ordered( 3948 self, parse_method: t.Optional[t.Callable] = None 3949 ) -> t.Optional[exp.Ordered]: 3950 this = parse_method() if parse_method else self._parse_assignment() 3951 if not this: 3952 return None 3953 3954 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3955 this = exp.var("ALL") 3956 3957 asc = self._match(TokenType.ASC) 3958 desc = self._match(TokenType.DESC) or (asc and False) 3959 3960 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3961 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3962 3963 nulls_first = is_nulls_first or False 3964 explicitly_null_ordered = is_nulls_first or is_nulls_last 3965 3966 if ( 3967 not explicitly_null_ordered 3968 and ( 3969 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3970 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3971 ) 3972 and self.dialect.NULL_ORDERING != "nulls_are_last" 3973 ): 3974 nulls_first = True 3975 3976 if self._match_text_seq("WITH", "FILL"): 3977 with_fill = self.expression( 3978 exp.WithFill, 3979 **{ # type: ignore 3980 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3981 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3982 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3983 }, 3984 ) 3985 else: 3986 with_fill = None 3987 3988 return self.expression( 3989 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3990 ) 3991 3992 def _parse_limit( 3993 self, 3994 this: t.Optional[exp.Expression] = None, 3995 top: bool = False, 3996 skip_limit_token: bool = False, 3997 ) -> t.Optional[exp.Expression]: 3998 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3999 comments = self._prev_comments 4000 if top: 4001 limit_paren = self._match(TokenType.L_PAREN) 4002 expression = self._parse_term() if limit_paren else self._parse_number() 4003 4004 if limit_paren: 4005 self._match_r_paren() 4006 else: 4007 expression = self._parse_term() 4008 4009 if self._match(TokenType.COMMA): 4010 offset = expression 4011 expression = self._parse_term() 4012 else: 4013 offset = None 4014 4015 limit_exp = self.expression( 4016 exp.Limit, 4017 this=this, 4018 expression=expression, 4019 offset=offset, 4020 comments=comments, 4021 expressions=self._parse_limit_by(), 4022 ) 4023 4024 return limit_exp 4025 4026 if self._match(TokenType.FETCH): 4027 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4028 direction = self._prev.text.upper() if direction else "FIRST" 4029 4030 count = self._parse_field(tokens=self.FETCH_TOKENS) 4031 percent = self._match(TokenType.PERCENT) 4032 4033 self._match_set((TokenType.ROW, TokenType.ROWS)) 4034 4035 only = self._match_text_seq("ONLY") 4036 with_ties = self._match_text_seq("WITH", "TIES") 4037 4038 if only and with_ties: 4039 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4040 4041 return self.expression( 4042 exp.Fetch, 4043 direction=direction, 4044 count=count, 4045 percent=percent, 4046 with_ties=with_ties, 4047 ) 4048 4049 return this 4050 4051 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4052 if not self._match(TokenType.OFFSET): 4053 return this 4054 4055 count = self._parse_term() 4056 self._match_set((TokenType.ROW, TokenType.ROWS)) 4057 4058 return self.expression( 4059 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4060 ) 4061 4062 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4063 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4064 4065 def _parse_locks(self) -> t.List[exp.Lock]: 4066 locks = [] 4067 while True: 4068 if self._match_text_seq("FOR", "UPDATE"): 4069 update = True 4070 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4071 "LOCK", "IN", "SHARE", "MODE" 4072 ): 4073 update = False 4074 else: 4075 break 4076 4077 expressions = None 4078 if self._match_text_seq("OF"): 4079 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4080 4081 wait: t.Optional[bool | exp.Expression] = None 4082 if self._match_text_seq("NOWAIT"): 4083 wait = True 4084 elif self._match_text_seq("WAIT"): 4085 wait = self._parse_primary() 4086 elif self._match_text_seq("SKIP", "LOCKED"): 4087 wait = False 4088 4089 locks.append( 4090 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4091 ) 4092 4093 return locks 4094 4095 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4096 while this and self._match_set(self.SET_OPERATIONS): 4097 token_type = self._prev.token_type 4098 4099 if token_type == TokenType.UNION: 4100 operation: t.Type[exp.SetOperation] = exp.Union 4101 elif token_type == TokenType.EXCEPT: 4102 operation = exp.Except 4103 else: 4104 operation = exp.Intersect 4105 4106 comments = self._prev.comments 4107 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4108 by_name = self._match_text_seq("BY", "NAME") 4109 expression = self._parse_select(nested=True, parse_set_operation=False) 4110 4111 this = self.expression( 4112 operation, 4113 comments=comments, 4114 this=this, 4115 distinct=distinct, 4116 by_name=by_name, 4117 expression=expression, 4118 ) 4119 4120 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4121 expression = this.expression 4122 4123 if expression: 4124 for arg in self.SET_OP_MODIFIERS: 4125 expr = expression.args.get(arg) 4126 if expr: 4127 this.set(arg, expr.pop()) 4128 4129 return this 4130 4131 def _parse_expression(self) -> t.Optional[exp.Expression]: 4132 return self._parse_alias(self._parse_assignment()) 4133 4134 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4135 this = self._parse_disjunction() 4136 4137 while self._match_set(self.ASSIGNMENT): 4138 this = self.expression( 4139 self.ASSIGNMENT[self._prev.token_type], 4140 this=this, 4141 comments=self._prev_comments, 4142 expression=self._parse_assignment(), 4143 ) 4144 4145 return this 4146 4147 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4148 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4149 4150 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4151 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4152 4153 def _parse_equality(self) -> t.Optional[exp.Expression]: 4154 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4155 4156 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4157 return self._parse_tokens(self._parse_range, self.COMPARISON) 4158 4159 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4160 this = this or self._parse_bitwise() 4161 negate = self._match(TokenType.NOT) 4162 4163 if self._match_set(self.RANGE_PARSERS): 4164 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4165 if not expression: 4166 return this 4167 4168 this = expression 4169 elif self._match(TokenType.ISNULL): 4170 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4171 4172 # Postgres supports ISNULL and NOTNULL for conditions. 4173 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4174 if self._match(TokenType.NOTNULL): 4175 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4176 this = self.expression(exp.Not, this=this) 4177 4178 if negate: 4179 this = self.expression(exp.Not, this=this) 4180 4181 if self._match(TokenType.IS): 4182 this = self._parse_is(this) 4183 4184 return this 4185 4186 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4187 index = self._index - 1 4188 negate = self._match(TokenType.NOT) 4189 4190 if self._match_text_seq("DISTINCT", "FROM"): 4191 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4192 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4193 4194 expression = self._parse_null() or self._parse_boolean() 4195 if not expression: 4196 self._retreat(index) 4197 return None 4198 4199 this = self.expression(exp.Is, this=this, expression=expression) 4200 return self.expression(exp.Not, this=this) if negate else this 4201 4202 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4203 unnest = self._parse_unnest(with_alias=False) 4204 if unnest: 4205 this = self.expression(exp.In, this=this, unnest=unnest) 4206 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4207 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4208 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4209 4210 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4211 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4212 else: 4213 this = self.expression(exp.In, this=this, expressions=expressions) 4214 4215 if matched_l_paren: 4216 self._match_r_paren(this) 4217 elif not self._match(TokenType.R_BRACKET, expression=this): 4218 self.raise_error("Expecting ]") 4219 else: 4220 this = self.expression(exp.In, this=this, field=self._parse_field()) 4221 4222 return this 4223 4224 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4225 low = self._parse_bitwise() 4226 self._match(TokenType.AND) 4227 high = self._parse_bitwise() 4228 return self.expression(exp.Between, this=this, low=low, high=high) 4229 4230 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4231 if not self._match(TokenType.ESCAPE): 4232 return this 4233 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4234 4235 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4236 index = self._index 4237 4238 if not self._match(TokenType.INTERVAL) and match_interval: 4239 return None 4240 4241 if self._match(TokenType.STRING, advance=False): 4242 this = self._parse_primary() 4243 else: 4244 this = self._parse_term() 4245 4246 if not this or ( 4247 isinstance(this, exp.Column) 4248 and not this.table 4249 and not this.this.quoted 4250 and this.name.upper() == "IS" 4251 ): 4252 self._retreat(index) 4253 return None 4254 4255 unit = self._parse_function() or ( 4256 not self._match(TokenType.ALIAS, advance=False) 4257 and self._parse_var(any_token=True, upper=True) 4258 ) 4259 4260 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4261 # each INTERVAL expression into this canonical form so it's easy to transpile 4262 if this and this.is_number: 4263 this = exp.Literal.string(this.to_py()) 4264 elif this and this.is_string: 4265 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4266 if len(parts) == 1: 4267 if unit: 4268 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4269 self._retreat(self._index - 1) 4270 4271 this = exp.Literal.string(parts[0][0]) 4272 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4273 4274 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4275 unit = self.expression( 4276 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4277 ) 4278 4279 interval = self.expression(exp.Interval, this=this, unit=unit) 4280 4281 index = self._index 4282 self._match(TokenType.PLUS) 4283 4284 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4285 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4286 return self.expression( 4287 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4288 ) 4289 4290 self._retreat(index) 4291 return interval 4292 4293 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4294 this = self._parse_term() 4295 4296 while True: 4297 if self._match_set(self.BITWISE): 4298 this = self.expression( 4299 self.BITWISE[self._prev.token_type], 4300 this=this, 4301 expression=self._parse_term(), 4302 ) 4303 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4304 this = self.expression( 4305 exp.DPipe, 4306 this=this, 4307 expression=self._parse_term(), 4308 safe=not self.dialect.STRICT_STRING_CONCAT, 4309 ) 4310 elif self._match(TokenType.DQMARK): 4311 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4312 elif self._match_pair(TokenType.LT, TokenType.LT): 4313 this = self.expression( 4314 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4315 ) 4316 elif self._match_pair(TokenType.GT, TokenType.GT): 4317 this = self.expression( 4318 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4319 ) 4320 else: 4321 break 4322 4323 return this 4324 4325 def _parse_term(self) -> t.Optional[exp.Expression]: 4326 return self._parse_tokens(self._parse_factor, self.TERM) 4327 4328 def _parse_factor(self) -> t.Optional[exp.Expression]: 4329 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4330 this = parse_method() 4331 4332 while self._match_set(self.FACTOR): 4333 klass = self.FACTOR[self._prev.token_type] 4334 comments = self._prev_comments 4335 expression = parse_method() 4336 4337 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4338 self._retreat(self._index - 1) 4339 return this 4340 4341 this = self.expression(klass, this=this, comments=comments, expression=expression) 4342 4343 if isinstance(this, exp.Div): 4344 this.args["typed"] = self.dialect.TYPED_DIVISION 4345 this.args["safe"] = self.dialect.SAFE_DIVISION 4346 4347 return this 4348 4349 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4350 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4351 4352 def _parse_unary(self) -> t.Optional[exp.Expression]: 4353 if self._match_set(self.UNARY_PARSERS): 4354 return self.UNARY_PARSERS[self._prev.token_type](self) 4355 return self._parse_at_time_zone(self._parse_type()) 4356 4357 def _parse_type( 4358 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4359 ) -> t.Optional[exp.Expression]: 4360 interval = parse_interval and self._parse_interval() 4361 if interval: 4362 return interval 4363 4364 index = self._index 4365 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4366 4367 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4368 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4369 if isinstance(data_type, exp.Cast): 4370 # This constructor can contain ops directly after it, for instance struct unnesting: 4371 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4372 return self._parse_column_ops(data_type) 4373 4374 if data_type: 4375 index2 = self._index 4376 this = self._parse_primary() 4377 4378 if isinstance(this, exp.Literal): 4379 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4380 if parser: 4381 return parser(self, this, data_type) 4382 4383 return self.expression(exp.Cast, this=this, to=data_type) 4384 4385 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4386 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4387 # 4388 # If the index difference here is greater than 1, that means the parser itself must have 4389 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4390 # 4391 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4392 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4393 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4394 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4395 # 4396 # In these cases, we don't really want to return the converted type, but instead retreat 4397 # and try to parse a Column or Identifier in the section below. 4398 if data_type.expressions and index2 - index > 1: 4399 self._retreat(index2) 4400 return self._parse_column_ops(data_type) 4401 4402 self._retreat(index) 4403 4404 if fallback_to_identifier: 4405 return self._parse_id_var() 4406 4407 this = self._parse_column() 4408 return this and self._parse_column_ops(this) 4409 4410 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4411 this = self._parse_type() 4412 if not this: 4413 return None 4414 4415 if isinstance(this, exp.Column) and not this.table: 4416 this = exp.var(this.name.upper()) 4417 4418 return self.expression( 4419 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4420 ) 4421 4422 def _parse_types( 4423 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4424 ) -> t.Optional[exp.Expression]: 4425 index = self._index 4426 4427 this: t.Optional[exp.Expression] = None 4428 prefix = self._match_text_seq("SYSUDTLIB", ".") 4429 4430 if not self._match_set(self.TYPE_TOKENS): 4431 identifier = allow_identifiers and self._parse_id_var( 4432 any_token=False, tokens=(TokenType.VAR,) 4433 ) 4434 if isinstance(identifier, exp.Identifier): 4435 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4436 4437 if len(tokens) != 1: 4438 self.raise_error("Unexpected identifier", self._prev) 4439 4440 if tokens[0].token_type in self.TYPE_TOKENS: 4441 self._prev = tokens[0] 4442 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4443 type_name = identifier.name 4444 4445 while self._match(TokenType.DOT): 4446 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4447 4448 this = exp.DataType.build(type_name, udt=True) 4449 else: 4450 self._retreat(self._index - 1) 4451 return None 4452 else: 4453 return None 4454 4455 type_token = self._prev.token_type 4456 4457 if type_token == TokenType.PSEUDO_TYPE: 4458 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4459 4460 if type_token == TokenType.OBJECT_IDENTIFIER: 4461 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4462 4463 # https://materialize.com/docs/sql/types/map/ 4464 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4465 key_type = self._parse_types( 4466 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4467 ) 4468 if not self._match(TokenType.FARROW): 4469 self._retreat(index) 4470 return None 4471 4472 value_type = self._parse_types( 4473 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4474 ) 4475 if not self._match(TokenType.R_BRACKET): 4476 self._retreat(index) 4477 return None 4478 4479 return exp.DataType( 4480 this=exp.DataType.Type.MAP, 4481 expressions=[key_type, value_type], 4482 nested=True, 4483 prefix=prefix, 4484 ) 4485 4486 nested = type_token in self.NESTED_TYPE_TOKENS 4487 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4488 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4489 expressions = None 4490 maybe_func = False 4491 4492 if self._match(TokenType.L_PAREN): 4493 if is_struct: 4494 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4495 elif nested: 4496 expressions = self._parse_csv( 4497 lambda: self._parse_types( 4498 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4499 ) 4500 ) 4501 elif type_token in self.ENUM_TYPE_TOKENS: 4502 expressions = self._parse_csv(self._parse_equality) 4503 elif is_aggregate: 4504 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4505 any_token=False, tokens=(TokenType.VAR,) 4506 ) 4507 if not func_or_ident or not self._match(TokenType.COMMA): 4508 return None 4509 expressions = self._parse_csv( 4510 lambda: self._parse_types( 4511 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4512 ) 4513 ) 4514 expressions.insert(0, func_or_ident) 4515 else: 4516 expressions = self._parse_csv(self._parse_type_size) 4517 4518 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4519 if type_token == TokenType.VECTOR and len(expressions) == 2: 4520 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4521 4522 if not expressions or not self._match(TokenType.R_PAREN): 4523 self._retreat(index) 4524 return None 4525 4526 maybe_func = True 4527 4528 values: t.Optional[t.List[exp.Expression]] = None 4529 4530 if nested and self._match(TokenType.LT): 4531 if is_struct: 4532 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4533 else: 4534 expressions = self._parse_csv( 4535 lambda: self._parse_types( 4536 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4537 ) 4538 ) 4539 4540 if not self._match(TokenType.GT): 4541 self.raise_error("Expecting >") 4542 4543 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4544 values = self._parse_csv(self._parse_assignment) 4545 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4546 4547 if type_token in self.TIMESTAMPS: 4548 if self._match_text_seq("WITH", "TIME", "ZONE"): 4549 maybe_func = False 4550 tz_type = ( 4551 exp.DataType.Type.TIMETZ 4552 if type_token in self.TIMES 4553 else exp.DataType.Type.TIMESTAMPTZ 4554 ) 4555 this = exp.DataType(this=tz_type, expressions=expressions) 4556 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4557 maybe_func = False 4558 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4559 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4560 maybe_func = False 4561 elif type_token == TokenType.INTERVAL: 4562 unit = self._parse_var(upper=True) 4563 if unit: 4564 if self._match_text_seq("TO"): 4565 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4566 4567 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4568 else: 4569 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4570 4571 if maybe_func and check_func: 4572 index2 = self._index 4573 peek = self._parse_string() 4574 4575 if not peek: 4576 self._retreat(index) 4577 return None 4578 4579 self._retreat(index2) 4580 4581 if not this: 4582 if self._match_text_seq("UNSIGNED"): 4583 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4584 if not unsigned_type_token: 4585 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4586 4587 type_token = unsigned_type_token or type_token 4588 4589 this = exp.DataType( 4590 this=exp.DataType.Type[type_token.value], 4591 expressions=expressions, 4592 nested=nested, 4593 prefix=prefix, 4594 ) 4595 4596 # Empty arrays/structs are allowed 4597 if values is not None: 4598 cls = exp.Struct if is_struct else exp.Array 4599 this = exp.cast(cls(expressions=values), this, copy=False) 4600 4601 elif expressions: 4602 this.set("expressions", expressions) 4603 4604 # https://materialize.com/docs/sql/types/list/#type-name 4605 while self._match(TokenType.LIST): 4606 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4607 4608 index = self._index 4609 4610 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4611 matched_array = self._match(TokenType.ARRAY) 4612 4613 while self._curr: 4614 matched_l_bracket = self._match(TokenType.L_BRACKET) 4615 if not matched_l_bracket and not matched_array: 4616 break 4617 4618 matched_array = False 4619 values = self._parse_csv(self._parse_assignment) or None 4620 if ( 4621 values 4622 and not schema 4623 and this.is_type(exp.DataType.Type.ARRAY, exp.DataType.Type.MAP) 4624 ): 4625 self._retreat(index) 4626 break 4627 4628 this = exp.DataType( 4629 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4630 ) 4631 self._match(TokenType.R_BRACKET) 4632 4633 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4634 converter = self.TYPE_CONVERTERS.get(this.this) 4635 if converter: 4636 this = converter(t.cast(exp.DataType, this)) 4637 4638 return this 4639 4640 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4641 index = self._index 4642 4643 if ( 4644 self._curr 4645 and self._next 4646 and self._curr.token_type in self.TYPE_TOKENS 4647 and self._next.token_type in self.TYPE_TOKENS 4648 ): 4649 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4650 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4651 this = self._parse_id_var() 4652 else: 4653 this = ( 4654 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4655 or self._parse_id_var() 4656 ) 4657 4658 self._match(TokenType.COLON) 4659 4660 if ( 4661 type_required 4662 and not isinstance(this, exp.DataType) 4663 and not self._match_set(self.TYPE_TOKENS, advance=False) 4664 ): 4665 self._retreat(index) 4666 return self._parse_types() 4667 4668 return self._parse_column_def(this) 4669 4670 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4671 if not self._match_text_seq("AT", "TIME", "ZONE"): 4672 return this 4673 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4674 4675 def _parse_column(self) -> t.Optional[exp.Expression]: 4676 this = self._parse_column_reference() 4677 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4678 4679 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4680 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4681 4682 return column 4683 4684 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4685 this = self._parse_field() 4686 if ( 4687 not this 4688 and self._match(TokenType.VALUES, advance=False) 4689 and self.VALUES_FOLLOWED_BY_PAREN 4690 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4691 ): 4692 this = self._parse_id_var() 4693 4694 if isinstance(this, exp.Identifier): 4695 # We bubble up comments from the Identifier to the Column 4696 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4697 4698 return this 4699 4700 def _parse_colon_as_variant_extract( 4701 self, this: t.Optional[exp.Expression] 4702 ) -> t.Optional[exp.Expression]: 4703 casts = [] 4704 json_path = [] 4705 4706 while self._match(TokenType.COLON): 4707 start_index = self._index 4708 4709 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4710 path = self._parse_column_ops( 4711 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4712 ) 4713 4714 # The cast :: operator has a lower precedence than the extraction operator :, so 4715 # we rearrange the AST appropriately to avoid casting the JSON path 4716 while isinstance(path, exp.Cast): 4717 casts.append(path.to) 4718 path = path.this 4719 4720 if casts: 4721 dcolon_offset = next( 4722 i 4723 for i, t in enumerate(self._tokens[start_index:]) 4724 if t.token_type == TokenType.DCOLON 4725 ) 4726 end_token = self._tokens[start_index + dcolon_offset - 1] 4727 else: 4728 end_token = self._prev 4729 4730 if path: 4731 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4732 4733 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4734 # Databricks transforms it back to the colon/dot notation 4735 if json_path: 4736 this = self.expression( 4737 exp.JSONExtract, 4738 this=this, 4739 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4740 variant_extract=True, 4741 ) 4742 4743 while casts: 4744 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4745 4746 return this 4747 4748 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4749 return self._parse_types() 4750 4751 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4752 this = self._parse_bracket(this) 4753 4754 while self._match_set(self.COLUMN_OPERATORS): 4755 op_token = self._prev.token_type 4756 op = self.COLUMN_OPERATORS.get(op_token) 4757 4758 if op_token == TokenType.DCOLON: 4759 field = self._parse_dcolon() 4760 if not field: 4761 self.raise_error("Expected type") 4762 elif op and self._curr: 4763 field = self._parse_column_reference() 4764 else: 4765 field = self._parse_field(any_token=True, anonymous_func=True) 4766 4767 if isinstance(field, exp.Func) and this: 4768 # bigquery allows function calls like x.y.count(...) 4769 # SAFE.SUBSTR(...) 4770 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4771 this = exp.replace_tree( 4772 this, 4773 lambda n: ( 4774 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4775 if n.table 4776 else n.this 4777 ) 4778 if isinstance(n, exp.Column) 4779 else n, 4780 ) 4781 4782 if op: 4783 this = op(self, this, field) 4784 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4785 this = self.expression( 4786 exp.Column, 4787 this=field, 4788 table=this.this, 4789 db=this.args.get("table"), 4790 catalog=this.args.get("db"), 4791 ) 4792 else: 4793 this = self.expression(exp.Dot, this=this, expression=field) 4794 4795 this = self._parse_bracket(this) 4796 4797 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4798 4799 def _parse_primary(self) -> t.Optional[exp.Expression]: 4800 if self._match_set(self.PRIMARY_PARSERS): 4801 token_type = self._prev.token_type 4802 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4803 4804 if token_type == TokenType.STRING: 4805 expressions = [primary] 4806 while self._match(TokenType.STRING): 4807 expressions.append(exp.Literal.string(self._prev.text)) 4808 4809 if len(expressions) > 1: 4810 return self.expression(exp.Concat, expressions=expressions) 4811 4812 return primary 4813 4814 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4815 return exp.Literal.number(f"0.{self._prev.text}") 4816 4817 if self._match(TokenType.L_PAREN): 4818 comments = self._prev_comments 4819 query = self._parse_select() 4820 4821 if query: 4822 expressions = [query] 4823 else: 4824 expressions = self._parse_expressions() 4825 4826 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4827 4828 if not this and self._match(TokenType.R_PAREN, advance=False): 4829 this = self.expression(exp.Tuple) 4830 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4831 this = self._parse_subquery(this=this, parse_alias=False) 4832 elif isinstance(this, exp.Subquery): 4833 this = self._parse_subquery( 4834 this=self._parse_set_operations(this), parse_alias=False 4835 ) 4836 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4837 this = self.expression(exp.Tuple, expressions=expressions) 4838 else: 4839 this = self.expression(exp.Paren, this=this) 4840 4841 if this: 4842 this.add_comments(comments) 4843 4844 self._match_r_paren(expression=this) 4845 return this 4846 4847 return None 4848 4849 def _parse_field( 4850 self, 4851 any_token: bool = False, 4852 tokens: t.Optional[t.Collection[TokenType]] = None, 4853 anonymous_func: bool = False, 4854 ) -> t.Optional[exp.Expression]: 4855 if anonymous_func: 4856 field = ( 4857 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4858 or self._parse_primary() 4859 ) 4860 else: 4861 field = self._parse_primary() or self._parse_function( 4862 anonymous=anonymous_func, any_token=any_token 4863 ) 4864 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4865 4866 def _parse_function( 4867 self, 4868 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4869 anonymous: bool = False, 4870 optional_parens: bool = True, 4871 any_token: bool = False, 4872 ) -> t.Optional[exp.Expression]: 4873 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4874 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4875 fn_syntax = False 4876 if ( 4877 self._match(TokenType.L_BRACE, advance=False) 4878 and self._next 4879 and self._next.text.upper() == "FN" 4880 ): 4881 self._advance(2) 4882 fn_syntax = True 4883 4884 func = self._parse_function_call( 4885 functions=functions, 4886 anonymous=anonymous, 4887 optional_parens=optional_parens, 4888 any_token=any_token, 4889 ) 4890 4891 if fn_syntax: 4892 self._match(TokenType.R_BRACE) 4893 4894 return func 4895 4896 def _parse_function_call( 4897 self, 4898 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4899 anonymous: bool = False, 4900 optional_parens: bool = True, 4901 any_token: bool = False, 4902 ) -> t.Optional[exp.Expression]: 4903 if not self._curr: 4904 return None 4905 4906 comments = self._curr.comments 4907 token_type = self._curr.token_type 4908 this = self._curr.text 4909 upper = this.upper() 4910 4911 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4912 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4913 self._advance() 4914 return self._parse_window(parser(self)) 4915 4916 if not self._next or self._next.token_type != TokenType.L_PAREN: 4917 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4918 self._advance() 4919 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4920 4921 return None 4922 4923 if any_token: 4924 if token_type in self.RESERVED_TOKENS: 4925 return None 4926 elif token_type not in self.FUNC_TOKENS: 4927 return None 4928 4929 self._advance(2) 4930 4931 parser = self.FUNCTION_PARSERS.get(upper) 4932 if parser and not anonymous: 4933 this = parser(self) 4934 else: 4935 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4936 4937 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4938 this = self.expression(subquery_predicate, this=self._parse_select()) 4939 self._match_r_paren() 4940 return this 4941 4942 if functions is None: 4943 functions = self.FUNCTIONS 4944 4945 function = functions.get(upper) 4946 4947 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4948 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4949 4950 if alias: 4951 args = self._kv_to_prop_eq(args) 4952 4953 if function and not anonymous: 4954 if "dialect" in function.__code__.co_varnames: 4955 func = function(args, dialect=self.dialect) 4956 else: 4957 func = function(args) 4958 4959 func = self.validate_expression(func, args) 4960 if not self.dialect.NORMALIZE_FUNCTIONS: 4961 func.meta["name"] = this 4962 4963 this = func 4964 else: 4965 if token_type == TokenType.IDENTIFIER: 4966 this = exp.Identifier(this=this, quoted=True) 4967 this = self.expression(exp.Anonymous, this=this, expressions=args) 4968 4969 if isinstance(this, exp.Expression): 4970 this.add_comments(comments) 4971 4972 self._match_r_paren(this) 4973 return self._parse_window(this) 4974 4975 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4976 transformed = [] 4977 4978 for e in expressions: 4979 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4980 if isinstance(e, exp.Alias): 4981 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4982 4983 if not isinstance(e, exp.PropertyEQ): 4984 e = self.expression( 4985 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4986 ) 4987 4988 if isinstance(e.this, exp.Column): 4989 e.this.replace(e.this.this) 4990 4991 transformed.append(e) 4992 4993 return transformed 4994 4995 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4996 return self._parse_column_def(self._parse_id_var()) 4997 4998 def _parse_user_defined_function( 4999 self, kind: t.Optional[TokenType] = None 5000 ) -> t.Optional[exp.Expression]: 5001 this = self._parse_id_var() 5002 5003 while self._match(TokenType.DOT): 5004 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5005 5006 if not self._match(TokenType.L_PAREN): 5007 return this 5008 5009 expressions = self._parse_csv(self._parse_function_parameter) 5010 self._match_r_paren() 5011 return self.expression( 5012 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5013 ) 5014 5015 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5016 literal = self._parse_primary() 5017 if literal: 5018 return self.expression(exp.Introducer, this=token.text, expression=literal) 5019 5020 return self.expression(exp.Identifier, this=token.text) 5021 5022 def _parse_session_parameter(self) -> exp.SessionParameter: 5023 kind = None 5024 this = self._parse_id_var() or self._parse_primary() 5025 5026 if this and self._match(TokenType.DOT): 5027 kind = this.name 5028 this = self._parse_var() or self._parse_primary() 5029 5030 return self.expression(exp.SessionParameter, this=this, kind=kind) 5031 5032 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5033 return self._parse_id_var() 5034 5035 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5036 index = self._index 5037 5038 if self._match(TokenType.L_PAREN): 5039 expressions = t.cast( 5040 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5041 ) 5042 5043 if not self._match(TokenType.R_PAREN): 5044 self._retreat(index) 5045 else: 5046 expressions = [self._parse_lambda_arg()] 5047 5048 if self._match_set(self.LAMBDAS): 5049 return self.LAMBDAS[self._prev.token_type](self, expressions) 5050 5051 self._retreat(index) 5052 5053 this: t.Optional[exp.Expression] 5054 5055 if self._match(TokenType.DISTINCT): 5056 this = self.expression( 5057 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5058 ) 5059 else: 5060 this = self._parse_select_or_expression(alias=alias) 5061 5062 return self._parse_limit( 5063 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5064 ) 5065 5066 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5067 index = self._index 5068 if not self._match(TokenType.L_PAREN): 5069 return this 5070 5071 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5072 # expr can be of both types 5073 if self._match_set(self.SELECT_START_TOKENS): 5074 self._retreat(index) 5075 return this 5076 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5077 self._match_r_paren() 5078 return self.expression(exp.Schema, this=this, expressions=args) 5079 5080 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5081 return self._parse_column_def(self._parse_field(any_token=True)) 5082 5083 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5084 # column defs are not really columns, they're identifiers 5085 if isinstance(this, exp.Column): 5086 this = this.this 5087 5088 kind = self._parse_types(schema=True) 5089 5090 if self._match_text_seq("FOR", "ORDINALITY"): 5091 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5092 5093 constraints: t.List[exp.Expression] = [] 5094 5095 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5096 ("ALIAS", "MATERIALIZED") 5097 ): 5098 persisted = self._prev.text.upper() == "MATERIALIZED" 5099 constraints.append( 5100 self.expression( 5101 exp.ComputedColumnConstraint, 5102 this=self._parse_assignment(), 5103 persisted=persisted or self._match_text_seq("PERSISTED"), 5104 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5105 ) 5106 ) 5107 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5108 self._match(TokenType.ALIAS) 5109 constraints.append( 5110 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5111 ) 5112 5113 while True: 5114 constraint = self._parse_column_constraint() 5115 if not constraint: 5116 break 5117 constraints.append(constraint) 5118 5119 if not kind and not constraints: 5120 return this 5121 5122 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5123 5124 def _parse_auto_increment( 5125 self, 5126 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5127 start = None 5128 increment = None 5129 5130 if self._match(TokenType.L_PAREN, advance=False): 5131 args = self._parse_wrapped_csv(self._parse_bitwise) 5132 start = seq_get(args, 0) 5133 increment = seq_get(args, 1) 5134 elif self._match_text_seq("START"): 5135 start = self._parse_bitwise() 5136 self._match_text_seq("INCREMENT") 5137 increment = self._parse_bitwise() 5138 5139 if start and increment: 5140 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5141 5142 return exp.AutoIncrementColumnConstraint() 5143 5144 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5145 if not self._match_text_seq("REFRESH"): 5146 self._retreat(self._index - 1) 5147 return None 5148 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5149 5150 def _parse_compress(self) -> exp.CompressColumnConstraint: 5151 if self._match(TokenType.L_PAREN, advance=False): 5152 return self.expression( 5153 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5154 ) 5155 5156 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5157 5158 def _parse_generated_as_identity( 5159 self, 5160 ) -> ( 5161 exp.GeneratedAsIdentityColumnConstraint 5162 | exp.ComputedColumnConstraint 5163 | exp.GeneratedAsRowColumnConstraint 5164 ): 5165 if self._match_text_seq("BY", "DEFAULT"): 5166 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5167 this = self.expression( 5168 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5169 ) 5170 else: 5171 self._match_text_seq("ALWAYS") 5172 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5173 5174 self._match(TokenType.ALIAS) 5175 5176 if self._match_text_seq("ROW"): 5177 start = self._match_text_seq("START") 5178 if not start: 5179 self._match(TokenType.END) 5180 hidden = self._match_text_seq("HIDDEN") 5181 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5182 5183 identity = self._match_text_seq("IDENTITY") 5184 5185 if self._match(TokenType.L_PAREN): 5186 if self._match(TokenType.START_WITH): 5187 this.set("start", self._parse_bitwise()) 5188 if self._match_text_seq("INCREMENT", "BY"): 5189 this.set("increment", self._parse_bitwise()) 5190 if self._match_text_seq("MINVALUE"): 5191 this.set("minvalue", self._parse_bitwise()) 5192 if self._match_text_seq("MAXVALUE"): 5193 this.set("maxvalue", self._parse_bitwise()) 5194 5195 if self._match_text_seq("CYCLE"): 5196 this.set("cycle", True) 5197 elif self._match_text_seq("NO", "CYCLE"): 5198 this.set("cycle", False) 5199 5200 if not identity: 5201 this.set("expression", self._parse_range()) 5202 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5203 args = self._parse_csv(self._parse_bitwise) 5204 this.set("start", seq_get(args, 0)) 5205 this.set("increment", seq_get(args, 1)) 5206 5207 self._match_r_paren() 5208 5209 return this 5210 5211 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5212 self._match_text_seq("LENGTH") 5213 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5214 5215 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5216 if self._match_text_seq("NULL"): 5217 return self.expression(exp.NotNullColumnConstraint) 5218 if self._match_text_seq("CASESPECIFIC"): 5219 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5220 if self._match_text_seq("FOR", "REPLICATION"): 5221 return self.expression(exp.NotForReplicationColumnConstraint) 5222 return None 5223 5224 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5225 if self._match(TokenType.CONSTRAINT): 5226 this = self._parse_id_var() 5227 else: 5228 this = None 5229 5230 if self._match_texts(self.CONSTRAINT_PARSERS): 5231 return self.expression( 5232 exp.ColumnConstraint, 5233 this=this, 5234 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5235 ) 5236 5237 return this 5238 5239 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5240 if not self._match(TokenType.CONSTRAINT): 5241 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5242 5243 return self.expression( 5244 exp.Constraint, 5245 this=self._parse_id_var(), 5246 expressions=self._parse_unnamed_constraints(), 5247 ) 5248 5249 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5250 constraints = [] 5251 while True: 5252 constraint = self._parse_unnamed_constraint() or self._parse_function() 5253 if not constraint: 5254 break 5255 constraints.append(constraint) 5256 5257 return constraints 5258 5259 def _parse_unnamed_constraint( 5260 self, constraints: t.Optional[t.Collection[str]] = None 5261 ) -> t.Optional[exp.Expression]: 5262 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5263 constraints or self.CONSTRAINT_PARSERS 5264 ): 5265 return None 5266 5267 constraint = self._prev.text.upper() 5268 if constraint not in self.CONSTRAINT_PARSERS: 5269 self.raise_error(f"No parser found for schema constraint {constraint}.") 5270 5271 return self.CONSTRAINT_PARSERS[constraint](self) 5272 5273 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5274 return self._parse_id_var(any_token=False) 5275 5276 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5277 self._match_text_seq("KEY") 5278 return self.expression( 5279 exp.UniqueColumnConstraint, 5280 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5281 this=self._parse_schema(self._parse_unique_key()), 5282 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5283 on_conflict=self._parse_on_conflict(), 5284 ) 5285 5286 def _parse_key_constraint_options(self) -> t.List[str]: 5287 options = [] 5288 while True: 5289 if not self._curr: 5290 break 5291 5292 if self._match(TokenType.ON): 5293 action = None 5294 on = self._advance_any() and self._prev.text 5295 5296 if self._match_text_seq("NO", "ACTION"): 5297 action = "NO ACTION" 5298 elif self._match_text_seq("CASCADE"): 5299 action = "CASCADE" 5300 elif self._match_text_seq("RESTRICT"): 5301 action = "RESTRICT" 5302 elif self._match_pair(TokenType.SET, TokenType.NULL): 5303 action = "SET NULL" 5304 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5305 action = "SET DEFAULT" 5306 else: 5307 self.raise_error("Invalid key constraint") 5308 5309 options.append(f"ON {on} {action}") 5310 else: 5311 var = self._parse_var_from_options( 5312 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5313 ) 5314 if not var: 5315 break 5316 options.append(var.name) 5317 5318 return options 5319 5320 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5321 if match and not self._match(TokenType.REFERENCES): 5322 return None 5323 5324 expressions = None 5325 this = self._parse_table(schema=True) 5326 options = self._parse_key_constraint_options() 5327 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5328 5329 def _parse_foreign_key(self) -> exp.ForeignKey: 5330 expressions = self._parse_wrapped_id_vars() 5331 reference = self._parse_references() 5332 options = {} 5333 5334 while self._match(TokenType.ON): 5335 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5336 self.raise_error("Expected DELETE or UPDATE") 5337 5338 kind = self._prev.text.lower() 5339 5340 if self._match_text_seq("NO", "ACTION"): 5341 action = "NO ACTION" 5342 elif self._match(TokenType.SET): 5343 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5344 action = "SET " + self._prev.text.upper() 5345 else: 5346 self._advance() 5347 action = self._prev.text.upper() 5348 5349 options[kind] = action 5350 5351 return self.expression( 5352 exp.ForeignKey, 5353 expressions=expressions, 5354 reference=reference, 5355 **options, # type: ignore 5356 ) 5357 5358 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5359 return self._parse_field() 5360 5361 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5362 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5363 self._retreat(self._index - 1) 5364 return None 5365 5366 id_vars = self._parse_wrapped_id_vars() 5367 return self.expression( 5368 exp.PeriodForSystemTimeConstraint, 5369 this=seq_get(id_vars, 0), 5370 expression=seq_get(id_vars, 1), 5371 ) 5372 5373 def _parse_primary_key( 5374 self, wrapped_optional: bool = False, in_props: bool = False 5375 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5376 desc = ( 5377 self._match_set((TokenType.ASC, TokenType.DESC)) 5378 and self._prev.token_type == TokenType.DESC 5379 ) 5380 5381 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5382 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5383 5384 expressions = self._parse_wrapped_csv( 5385 self._parse_primary_key_part, optional=wrapped_optional 5386 ) 5387 options = self._parse_key_constraint_options() 5388 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5389 5390 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5391 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5392 5393 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5394 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5395 return this 5396 5397 bracket_kind = self._prev.token_type 5398 expressions = self._parse_csv( 5399 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5400 ) 5401 5402 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5403 self.raise_error("Expected ]") 5404 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5405 self.raise_error("Expected }") 5406 5407 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5408 if bracket_kind == TokenType.L_BRACE: 5409 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5410 elif not this: 5411 this = self.expression(exp.Array, expressions=expressions) 5412 else: 5413 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5414 if constructor_type: 5415 return self.expression(constructor_type, expressions=expressions) 5416 5417 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5418 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5419 5420 self._add_comments(this) 5421 return self._parse_bracket(this) 5422 5423 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5424 if self._match(TokenType.COLON): 5425 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5426 return this 5427 5428 def _parse_case(self) -> t.Optional[exp.Expression]: 5429 ifs = [] 5430 default = None 5431 5432 comments = self._prev_comments 5433 expression = self._parse_assignment() 5434 5435 while self._match(TokenType.WHEN): 5436 this = self._parse_assignment() 5437 self._match(TokenType.THEN) 5438 then = self._parse_assignment() 5439 ifs.append(self.expression(exp.If, this=this, true=then)) 5440 5441 if self._match(TokenType.ELSE): 5442 default = self._parse_assignment() 5443 5444 if not self._match(TokenType.END): 5445 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5446 default = exp.column("interval") 5447 else: 5448 self.raise_error("Expected END after CASE", self._prev) 5449 5450 return self.expression( 5451 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5452 ) 5453 5454 def _parse_if(self) -> t.Optional[exp.Expression]: 5455 if self._match(TokenType.L_PAREN): 5456 args = self._parse_csv(self._parse_assignment) 5457 this = self.validate_expression(exp.If.from_arg_list(args), args) 5458 self._match_r_paren() 5459 else: 5460 index = self._index - 1 5461 5462 if self.NO_PAREN_IF_COMMANDS and index == 0: 5463 return self._parse_as_command(self._prev) 5464 5465 condition = self._parse_assignment() 5466 5467 if not condition: 5468 self._retreat(index) 5469 return None 5470 5471 self._match(TokenType.THEN) 5472 true = self._parse_assignment() 5473 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5474 self._match(TokenType.END) 5475 this = self.expression(exp.If, this=condition, true=true, false=false) 5476 5477 return this 5478 5479 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5480 if not self._match_text_seq("VALUE", "FOR"): 5481 self._retreat(self._index - 1) 5482 return None 5483 5484 return self.expression( 5485 exp.NextValueFor, 5486 this=self._parse_column(), 5487 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5488 ) 5489 5490 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5491 this = self._parse_function() or self._parse_var_or_string(upper=True) 5492 5493 if self._match(TokenType.FROM): 5494 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5495 5496 if not self._match(TokenType.COMMA): 5497 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5498 5499 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5500 5501 def _parse_gap_fill(self) -> exp.GapFill: 5502 self._match(TokenType.TABLE) 5503 this = self._parse_table() 5504 5505 self._match(TokenType.COMMA) 5506 args = [this, *self._parse_csv(self._parse_lambda)] 5507 5508 gap_fill = exp.GapFill.from_arg_list(args) 5509 return self.validate_expression(gap_fill, args) 5510 5511 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5512 this = self._parse_assignment() 5513 5514 if not self._match(TokenType.ALIAS): 5515 if self._match(TokenType.COMMA): 5516 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5517 5518 self.raise_error("Expected AS after CAST") 5519 5520 fmt = None 5521 to = self._parse_types() 5522 5523 if self._match(TokenType.FORMAT): 5524 fmt_string = self._parse_string() 5525 fmt = self._parse_at_time_zone(fmt_string) 5526 5527 if not to: 5528 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5529 if to.this in exp.DataType.TEMPORAL_TYPES: 5530 this = self.expression( 5531 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5532 this=this, 5533 format=exp.Literal.string( 5534 format_time( 5535 fmt_string.this if fmt_string else "", 5536 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5537 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5538 ) 5539 ), 5540 safe=safe, 5541 ) 5542 5543 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5544 this.set("zone", fmt.args["zone"]) 5545 return this 5546 elif not to: 5547 self.raise_error("Expected TYPE after CAST") 5548 elif isinstance(to, exp.Identifier): 5549 to = exp.DataType.build(to.name, udt=True) 5550 elif to.this == exp.DataType.Type.CHAR: 5551 if self._match(TokenType.CHARACTER_SET): 5552 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5553 5554 return self.expression( 5555 exp.Cast if strict else exp.TryCast, 5556 this=this, 5557 to=to, 5558 format=fmt, 5559 safe=safe, 5560 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5561 ) 5562 5563 def _parse_string_agg(self) -> exp.Expression: 5564 if self._match(TokenType.DISTINCT): 5565 args: t.List[t.Optional[exp.Expression]] = [ 5566 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5567 ] 5568 if self._match(TokenType.COMMA): 5569 args.extend(self._parse_csv(self._parse_assignment)) 5570 else: 5571 args = self._parse_csv(self._parse_assignment) # type: ignore 5572 5573 index = self._index 5574 if not self._match(TokenType.R_PAREN) and args: 5575 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5576 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5577 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5578 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5579 5580 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5581 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5582 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5583 if not self._match_text_seq("WITHIN", "GROUP"): 5584 self._retreat(index) 5585 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5586 5587 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5588 order = self._parse_order(this=seq_get(args, 0)) 5589 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5590 5591 def _parse_convert( 5592 self, strict: bool, safe: t.Optional[bool] = None 5593 ) -> t.Optional[exp.Expression]: 5594 this = self._parse_bitwise() 5595 5596 if self._match(TokenType.USING): 5597 to: t.Optional[exp.Expression] = self.expression( 5598 exp.CharacterSet, this=self._parse_var() 5599 ) 5600 elif self._match(TokenType.COMMA): 5601 to = self._parse_types() 5602 else: 5603 to = None 5604 5605 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5606 5607 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5608 """ 5609 There are generally two variants of the DECODE function: 5610 5611 - DECODE(bin, charset) 5612 - DECODE(expression, search, result [, search, result] ... [, default]) 5613 5614 The second variant will always be parsed into a CASE expression. Note that NULL 5615 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5616 instead of relying on pattern matching. 5617 """ 5618 args = self._parse_csv(self._parse_assignment) 5619 5620 if len(args) < 3: 5621 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5622 5623 expression, *expressions = args 5624 if not expression: 5625 return None 5626 5627 ifs = [] 5628 for search, result in zip(expressions[::2], expressions[1::2]): 5629 if not search or not result: 5630 return None 5631 5632 if isinstance(search, exp.Literal): 5633 ifs.append( 5634 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5635 ) 5636 elif isinstance(search, exp.Null): 5637 ifs.append( 5638 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5639 ) 5640 else: 5641 cond = exp.or_( 5642 exp.EQ(this=expression.copy(), expression=search), 5643 exp.and_( 5644 exp.Is(this=expression.copy(), expression=exp.Null()), 5645 exp.Is(this=search.copy(), expression=exp.Null()), 5646 copy=False, 5647 ), 5648 copy=False, 5649 ) 5650 ifs.append(exp.If(this=cond, true=result)) 5651 5652 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5653 5654 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5655 self._match_text_seq("KEY") 5656 key = self._parse_column() 5657 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5658 self._match_text_seq("VALUE") 5659 value = self._parse_bitwise() 5660 5661 if not key and not value: 5662 return None 5663 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5664 5665 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5666 if not this or not self._match_text_seq("FORMAT", "JSON"): 5667 return this 5668 5669 return self.expression(exp.FormatJson, this=this) 5670 5671 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5672 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5673 for value in values: 5674 if self._match_text_seq(value, "ON", on): 5675 return f"{value} ON {on}" 5676 5677 return None 5678 5679 @t.overload 5680 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5681 5682 @t.overload 5683 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5684 5685 def _parse_json_object(self, agg=False): 5686 star = self._parse_star() 5687 expressions = ( 5688 [star] 5689 if star 5690 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5691 ) 5692 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5693 5694 unique_keys = None 5695 if self._match_text_seq("WITH", "UNIQUE"): 5696 unique_keys = True 5697 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5698 unique_keys = False 5699 5700 self._match_text_seq("KEYS") 5701 5702 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5703 self._parse_type() 5704 ) 5705 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5706 5707 return self.expression( 5708 exp.JSONObjectAgg if agg else exp.JSONObject, 5709 expressions=expressions, 5710 null_handling=null_handling, 5711 unique_keys=unique_keys, 5712 return_type=return_type, 5713 encoding=encoding, 5714 ) 5715 5716 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5717 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5718 if not self._match_text_seq("NESTED"): 5719 this = self._parse_id_var() 5720 kind = self._parse_types(allow_identifiers=False) 5721 nested = None 5722 else: 5723 this = None 5724 kind = None 5725 nested = True 5726 5727 path = self._match_text_seq("PATH") and self._parse_string() 5728 nested_schema = nested and self._parse_json_schema() 5729 5730 return self.expression( 5731 exp.JSONColumnDef, 5732 this=this, 5733 kind=kind, 5734 path=path, 5735 nested_schema=nested_schema, 5736 ) 5737 5738 def _parse_json_schema(self) -> exp.JSONSchema: 5739 self._match_text_seq("COLUMNS") 5740 return self.expression( 5741 exp.JSONSchema, 5742 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5743 ) 5744 5745 def _parse_json_table(self) -> exp.JSONTable: 5746 this = self._parse_format_json(self._parse_bitwise()) 5747 path = self._match(TokenType.COMMA) and self._parse_string() 5748 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5749 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5750 schema = self._parse_json_schema() 5751 5752 return exp.JSONTable( 5753 this=this, 5754 schema=schema, 5755 path=path, 5756 error_handling=error_handling, 5757 empty_handling=empty_handling, 5758 ) 5759 5760 def _parse_match_against(self) -> exp.MatchAgainst: 5761 expressions = self._parse_csv(self._parse_column) 5762 5763 self._match_text_seq(")", "AGAINST", "(") 5764 5765 this = self._parse_string() 5766 5767 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5768 modifier = "IN NATURAL LANGUAGE MODE" 5769 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5770 modifier = f"{modifier} WITH QUERY EXPANSION" 5771 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5772 modifier = "IN BOOLEAN MODE" 5773 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5774 modifier = "WITH QUERY EXPANSION" 5775 else: 5776 modifier = None 5777 5778 return self.expression( 5779 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5780 ) 5781 5782 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5783 def _parse_open_json(self) -> exp.OpenJSON: 5784 this = self._parse_bitwise() 5785 path = self._match(TokenType.COMMA) and self._parse_string() 5786 5787 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5788 this = self._parse_field(any_token=True) 5789 kind = self._parse_types() 5790 path = self._parse_string() 5791 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5792 5793 return self.expression( 5794 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5795 ) 5796 5797 expressions = None 5798 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5799 self._match_l_paren() 5800 expressions = self._parse_csv(_parse_open_json_column_def) 5801 5802 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5803 5804 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5805 args = self._parse_csv(self._parse_bitwise) 5806 5807 if self._match(TokenType.IN): 5808 return self.expression( 5809 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5810 ) 5811 5812 if haystack_first: 5813 haystack = seq_get(args, 0) 5814 needle = seq_get(args, 1) 5815 else: 5816 needle = seq_get(args, 0) 5817 haystack = seq_get(args, 1) 5818 5819 return self.expression( 5820 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5821 ) 5822 5823 def _parse_predict(self) -> exp.Predict: 5824 self._match_text_seq("MODEL") 5825 this = self._parse_table() 5826 5827 self._match(TokenType.COMMA) 5828 self._match_text_seq("TABLE") 5829 5830 return self.expression( 5831 exp.Predict, 5832 this=this, 5833 expression=self._parse_table(), 5834 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5835 ) 5836 5837 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5838 args = self._parse_csv(self._parse_table) 5839 return exp.JoinHint(this=func_name.upper(), expressions=args) 5840 5841 def _parse_substring(self) -> exp.Substring: 5842 # Postgres supports the form: substring(string [from int] [for int]) 5843 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5844 5845 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5846 5847 if self._match(TokenType.FROM): 5848 args.append(self._parse_bitwise()) 5849 if self._match(TokenType.FOR): 5850 if len(args) == 1: 5851 args.append(exp.Literal.number(1)) 5852 args.append(self._parse_bitwise()) 5853 5854 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5855 5856 def _parse_trim(self) -> exp.Trim: 5857 # https://www.w3resource.com/sql/character-functions/trim.php 5858 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5859 5860 position = None 5861 collation = None 5862 expression = None 5863 5864 if self._match_texts(self.TRIM_TYPES): 5865 position = self._prev.text.upper() 5866 5867 this = self._parse_bitwise() 5868 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5869 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5870 expression = self._parse_bitwise() 5871 5872 if invert_order: 5873 this, expression = expression, this 5874 5875 if self._match(TokenType.COLLATE): 5876 collation = self._parse_bitwise() 5877 5878 return self.expression( 5879 exp.Trim, this=this, position=position, expression=expression, collation=collation 5880 ) 5881 5882 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5883 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5884 5885 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5886 return self._parse_window(self._parse_id_var(), alias=True) 5887 5888 def _parse_respect_or_ignore_nulls( 5889 self, this: t.Optional[exp.Expression] 5890 ) -> t.Optional[exp.Expression]: 5891 if self._match_text_seq("IGNORE", "NULLS"): 5892 return self.expression(exp.IgnoreNulls, this=this) 5893 if self._match_text_seq("RESPECT", "NULLS"): 5894 return self.expression(exp.RespectNulls, this=this) 5895 return this 5896 5897 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5898 if self._match(TokenType.HAVING): 5899 self._match_texts(("MAX", "MIN")) 5900 max = self._prev.text.upper() != "MIN" 5901 return self.expression( 5902 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5903 ) 5904 5905 return this 5906 5907 def _parse_window( 5908 self, this: t.Optional[exp.Expression], alias: bool = False 5909 ) -> t.Optional[exp.Expression]: 5910 func = this 5911 comments = func.comments if isinstance(func, exp.Expression) else None 5912 5913 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5914 self._match(TokenType.WHERE) 5915 this = self.expression( 5916 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5917 ) 5918 self._match_r_paren() 5919 5920 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5921 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5922 if self._match_text_seq("WITHIN", "GROUP"): 5923 order = self._parse_wrapped(self._parse_order) 5924 this = self.expression(exp.WithinGroup, this=this, expression=order) 5925 5926 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5927 # Some dialects choose to implement and some do not. 5928 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5929 5930 # There is some code above in _parse_lambda that handles 5931 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5932 5933 # The below changes handle 5934 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5935 5936 # Oracle allows both formats 5937 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5938 # and Snowflake chose to do the same for familiarity 5939 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5940 if isinstance(this, exp.AggFunc): 5941 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5942 5943 if ignore_respect and ignore_respect is not this: 5944 ignore_respect.replace(ignore_respect.this) 5945 this = self.expression(ignore_respect.__class__, this=this) 5946 5947 this = self._parse_respect_or_ignore_nulls(this) 5948 5949 # bigquery select from window x AS (partition by ...) 5950 if alias: 5951 over = None 5952 self._match(TokenType.ALIAS) 5953 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5954 return this 5955 else: 5956 over = self._prev.text.upper() 5957 5958 if comments and isinstance(func, exp.Expression): 5959 func.pop_comments() 5960 5961 if not self._match(TokenType.L_PAREN): 5962 return self.expression( 5963 exp.Window, 5964 comments=comments, 5965 this=this, 5966 alias=self._parse_id_var(False), 5967 over=over, 5968 ) 5969 5970 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5971 5972 first = self._match(TokenType.FIRST) 5973 if self._match_text_seq("LAST"): 5974 first = False 5975 5976 partition, order = self._parse_partition_and_order() 5977 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5978 5979 if kind: 5980 self._match(TokenType.BETWEEN) 5981 start = self._parse_window_spec() 5982 self._match(TokenType.AND) 5983 end = self._parse_window_spec() 5984 5985 spec = self.expression( 5986 exp.WindowSpec, 5987 kind=kind, 5988 start=start["value"], 5989 start_side=start["side"], 5990 end=end["value"], 5991 end_side=end["side"], 5992 ) 5993 else: 5994 spec = None 5995 5996 self._match_r_paren() 5997 5998 window = self.expression( 5999 exp.Window, 6000 comments=comments, 6001 this=this, 6002 partition_by=partition, 6003 order=order, 6004 spec=spec, 6005 alias=window_alias, 6006 over=over, 6007 first=first, 6008 ) 6009 6010 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6011 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6012 return self._parse_window(window, alias=alias) 6013 6014 return window 6015 6016 def _parse_partition_and_order( 6017 self, 6018 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6019 return self._parse_partition_by(), self._parse_order() 6020 6021 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6022 self._match(TokenType.BETWEEN) 6023 6024 return { 6025 "value": ( 6026 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6027 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6028 or self._parse_bitwise() 6029 ), 6030 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6031 } 6032 6033 def _parse_alias( 6034 self, this: t.Optional[exp.Expression], explicit: bool = False 6035 ) -> t.Optional[exp.Expression]: 6036 any_token = self._match(TokenType.ALIAS) 6037 comments = self._prev_comments or [] 6038 6039 if explicit and not any_token: 6040 return this 6041 6042 if self._match(TokenType.L_PAREN): 6043 aliases = self.expression( 6044 exp.Aliases, 6045 comments=comments, 6046 this=this, 6047 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6048 ) 6049 self._match_r_paren(aliases) 6050 return aliases 6051 6052 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6053 self.STRING_ALIASES and self._parse_string_as_identifier() 6054 ) 6055 6056 if alias: 6057 comments.extend(alias.pop_comments()) 6058 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6059 column = this.this 6060 6061 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6062 if not this.comments and column and column.comments: 6063 this.comments = column.pop_comments() 6064 6065 return this 6066 6067 def _parse_id_var( 6068 self, 6069 any_token: bool = True, 6070 tokens: t.Optional[t.Collection[TokenType]] = None, 6071 ) -> t.Optional[exp.Expression]: 6072 expression = self._parse_identifier() 6073 if not expression and ( 6074 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6075 ): 6076 quoted = self._prev.token_type == TokenType.STRING 6077 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6078 6079 return expression 6080 6081 def _parse_string(self) -> t.Optional[exp.Expression]: 6082 if self._match_set(self.STRING_PARSERS): 6083 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6084 return self._parse_placeholder() 6085 6086 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6087 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6088 6089 def _parse_number(self) -> t.Optional[exp.Expression]: 6090 if self._match_set(self.NUMERIC_PARSERS): 6091 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6092 return self._parse_placeholder() 6093 6094 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6095 if self._match(TokenType.IDENTIFIER): 6096 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6097 return self._parse_placeholder() 6098 6099 def _parse_var( 6100 self, 6101 any_token: bool = False, 6102 tokens: t.Optional[t.Collection[TokenType]] = None, 6103 upper: bool = False, 6104 ) -> t.Optional[exp.Expression]: 6105 if ( 6106 (any_token and self._advance_any()) 6107 or self._match(TokenType.VAR) 6108 or (self._match_set(tokens) if tokens else False) 6109 ): 6110 return self.expression( 6111 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6112 ) 6113 return self._parse_placeholder() 6114 6115 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6116 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6117 self._advance() 6118 return self._prev 6119 return None 6120 6121 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6122 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6123 6124 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6125 return self._parse_primary() or self._parse_var(any_token=True) 6126 6127 def _parse_null(self) -> t.Optional[exp.Expression]: 6128 if self._match_set(self.NULL_TOKENS): 6129 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6130 return self._parse_placeholder() 6131 6132 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6133 if self._match(TokenType.TRUE): 6134 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6135 if self._match(TokenType.FALSE): 6136 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6137 return self._parse_placeholder() 6138 6139 def _parse_star(self) -> t.Optional[exp.Expression]: 6140 if self._match(TokenType.STAR): 6141 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6142 return self._parse_placeholder() 6143 6144 def _parse_parameter(self) -> exp.Parameter: 6145 this = self._parse_identifier() or self._parse_primary_or_var() 6146 return self.expression(exp.Parameter, this=this) 6147 6148 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6149 if self._match_set(self.PLACEHOLDER_PARSERS): 6150 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6151 if placeholder: 6152 return placeholder 6153 self._advance(-1) 6154 return None 6155 6156 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6157 if not self._match_texts(keywords): 6158 return None 6159 if self._match(TokenType.L_PAREN, advance=False): 6160 return self._parse_wrapped_csv(self._parse_expression) 6161 6162 expression = self._parse_expression() 6163 return [expression] if expression else None 6164 6165 def _parse_csv( 6166 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6167 ) -> t.List[exp.Expression]: 6168 parse_result = parse_method() 6169 items = [parse_result] if parse_result is not None else [] 6170 6171 while self._match(sep): 6172 self._add_comments(parse_result) 6173 parse_result = parse_method() 6174 if parse_result is not None: 6175 items.append(parse_result) 6176 6177 return items 6178 6179 def _parse_tokens( 6180 self, parse_method: t.Callable, expressions: t.Dict 6181 ) -> t.Optional[exp.Expression]: 6182 this = parse_method() 6183 6184 while self._match_set(expressions): 6185 this = self.expression( 6186 expressions[self._prev.token_type], 6187 this=this, 6188 comments=self._prev_comments, 6189 expression=parse_method(), 6190 ) 6191 6192 return this 6193 6194 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6195 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6196 6197 def _parse_wrapped_csv( 6198 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6199 ) -> t.List[exp.Expression]: 6200 return self._parse_wrapped( 6201 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6202 ) 6203 6204 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6205 wrapped = self._match(TokenType.L_PAREN) 6206 if not wrapped and not optional: 6207 self.raise_error("Expecting (") 6208 parse_result = parse_method() 6209 if wrapped: 6210 self._match_r_paren() 6211 return parse_result 6212 6213 def _parse_expressions(self) -> t.List[exp.Expression]: 6214 return self._parse_csv(self._parse_expression) 6215 6216 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6217 return self._parse_select() or self._parse_set_operations( 6218 self._parse_expression() if alias else self._parse_assignment() 6219 ) 6220 6221 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6222 return self._parse_query_modifiers( 6223 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6224 ) 6225 6226 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6227 this = None 6228 if self._match_texts(self.TRANSACTION_KIND): 6229 this = self._prev.text 6230 6231 self._match_texts(("TRANSACTION", "WORK")) 6232 6233 modes = [] 6234 while True: 6235 mode = [] 6236 while self._match(TokenType.VAR): 6237 mode.append(self._prev.text) 6238 6239 if mode: 6240 modes.append(" ".join(mode)) 6241 if not self._match(TokenType.COMMA): 6242 break 6243 6244 return self.expression(exp.Transaction, this=this, modes=modes) 6245 6246 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6247 chain = None 6248 savepoint = None 6249 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6250 6251 self._match_texts(("TRANSACTION", "WORK")) 6252 6253 if self._match_text_seq("TO"): 6254 self._match_text_seq("SAVEPOINT") 6255 savepoint = self._parse_id_var() 6256 6257 if self._match(TokenType.AND): 6258 chain = not self._match_text_seq("NO") 6259 self._match_text_seq("CHAIN") 6260 6261 if is_rollback: 6262 return self.expression(exp.Rollback, savepoint=savepoint) 6263 6264 return self.expression(exp.Commit, chain=chain) 6265 6266 def _parse_refresh(self) -> exp.Refresh: 6267 self._match(TokenType.TABLE) 6268 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6269 6270 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6271 if not self._match_text_seq("ADD"): 6272 return None 6273 6274 self._match(TokenType.COLUMN) 6275 exists_column = self._parse_exists(not_=True) 6276 expression = self._parse_field_def() 6277 6278 if expression: 6279 expression.set("exists", exists_column) 6280 6281 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6282 if self._match_texts(("FIRST", "AFTER")): 6283 position = self._prev.text 6284 column_position = self.expression( 6285 exp.ColumnPosition, this=self._parse_column(), position=position 6286 ) 6287 expression.set("position", column_position) 6288 6289 return expression 6290 6291 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6292 drop = self._match(TokenType.DROP) and self._parse_drop() 6293 if drop and not isinstance(drop, exp.Command): 6294 drop.set("kind", drop.args.get("kind", "COLUMN")) 6295 return drop 6296 6297 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6298 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6299 return self.expression( 6300 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6301 ) 6302 6303 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6304 index = self._index - 1 6305 6306 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6307 return self._parse_csv( 6308 lambda: self.expression( 6309 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6310 ) 6311 ) 6312 6313 self._retreat(index) 6314 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6315 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6316 6317 if self._match_text_seq("ADD", "COLUMNS"): 6318 schema = self._parse_schema() 6319 if schema: 6320 return [schema] 6321 return [] 6322 6323 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6324 6325 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6326 if self._match_texts(self.ALTER_ALTER_PARSERS): 6327 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6328 6329 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6330 # keyword after ALTER we default to parsing this statement 6331 self._match(TokenType.COLUMN) 6332 column = self._parse_field(any_token=True) 6333 6334 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6335 return self.expression(exp.AlterColumn, this=column, drop=True) 6336 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6337 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6338 if self._match(TokenType.COMMENT): 6339 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6340 if self._match_text_seq("DROP", "NOT", "NULL"): 6341 return self.expression( 6342 exp.AlterColumn, 6343 this=column, 6344 drop=True, 6345 allow_null=True, 6346 ) 6347 if self._match_text_seq("SET", "NOT", "NULL"): 6348 return self.expression( 6349 exp.AlterColumn, 6350 this=column, 6351 allow_null=False, 6352 ) 6353 self._match_text_seq("SET", "DATA") 6354 self._match_text_seq("TYPE") 6355 return self.expression( 6356 exp.AlterColumn, 6357 this=column, 6358 dtype=self._parse_types(), 6359 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6360 using=self._match(TokenType.USING) and self._parse_assignment(), 6361 ) 6362 6363 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6364 if self._match_texts(("ALL", "EVEN", "AUTO")): 6365 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6366 6367 self._match_text_seq("KEY", "DISTKEY") 6368 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6369 6370 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6371 if compound: 6372 self._match_text_seq("SORTKEY") 6373 6374 if self._match(TokenType.L_PAREN, advance=False): 6375 return self.expression( 6376 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6377 ) 6378 6379 self._match_texts(("AUTO", "NONE")) 6380 return self.expression( 6381 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6382 ) 6383 6384 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6385 index = self._index - 1 6386 6387 partition_exists = self._parse_exists() 6388 if self._match(TokenType.PARTITION, advance=False): 6389 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6390 6391 self._retreat(index) 6392 return self._parse_csv(self._parse_drop_column) 6393 6394 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6395 if self._match(TokenType.COLUMN): 6396 exists = self._parse_exists() 6397 old_column = self._parse_column() 6398 to = self._match_text_seq("TO") 6399 new_column = self._parse_column() 6400 6401 if old_column is None or to is None or new_column is None: 6402 return None 6403 6404 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6405 6406 self._match_text_seq("TO") 6407 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6408 6409 def _parse_alter_table_set(self) -> exp.AlterSet: 6410 alter_set = self.expression(exp.AlterSet) 6411 6412 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6413 "TABLE", "PROPERTIES" 6414 ): 6415 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6416 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6417 alter_set.set("expressions", [self._parse_assignment()]) 6418 elif self._match_texts(("LOGGED", "UNLOGGED")): 6419 alter_set.set("option", exp.var(self._prev.text.upper())) 6420 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6421 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6422 elif self._match_text_seq("LOCATION"): 6423 alter_set.set("location", self._parse_field()) 6424 elif self._match_text_seq("ACCESS", "METHOD"): 6425 alter_set.set("access_method", self._parse_field()) 6426 elif self._match_text_seq("TABLESPACE"): 6427 alter_set.set("tablespace", self._parse_field()) 6428 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6429 alter_set.set("file_format", [self._parse_field()]) 6430 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6431 alter_set.set("file_format", self._parse_wrapped_options()) 6432 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6433 alter_set.set("copy_options", self._parse_wrapped_options()) 6434 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6435 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6436 else: 6437 if self._match_text_seq("SERDE"): 6438 alter_set.set("serde", self._parse_field()) 6439 6440 alter_set.set("expressions", [self._parse_properties()]) 6441 6442 return alter_set 6443 6444 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6445 start = self._prev 6446 6447 if not self._match(TokenType.TABLE): 6448 return self._parse_as_command(start) 6449 6450 exists = self._parse_exists() 6451 only = self._match_text_seq("ONLY") 6452 this = self._parse_table(schema=True) 6453 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6454 6455 if self._next: 6456 self._advance() 6457 6458 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6459 if parser: 6460 actions = ensure_list(parser(self)) 6461 options = self._parse_csv(self._parse_property) 6462 6463 if not self._curr and actions: 6464 return self.expression( 6465 exp.AlterTable, 6466 this=this, 6467 exists=exists, 6468 actions=actions, 6469 only=only, 6470 options=options, 6471 cluster=cluster, 6472 ) 6473 6474 return self._parse_as_command(start) 6475 6476 def _parse_merge(self) -> exp.Merge: 6477 self._match(TokenType.INTO) 6478 target = self._parse_table() 6479 6480 if target and self._match(TokenType.ALIAS, advance=False): 6481 target.set("alias", self._parse_table_alias()) 6482 6483 self._match(TokenType.USING) 6484 using = self._parse_table() 6485 6486 self._match(TokenType.ON) 6487 on = self._parse_assignment() 6488 6489 return self.expression( 6490 exp.Merge, 6491 this=target, 6492 using=using, 6493 on=on, 6494 expressions=self._parse_when_matched(), 6495 ) 6496 6497 def _parse_when_matched(self) -> t.List[exp.When]: 6498 whens = [] 6499 6500 while self._match(TokenType.WHEN): 6501 matched = not self._match(TokenType.NOT) 6502 self._match_text_seq("MATCHED") 6503 source = ( 6504 False 6505 if self._match_text_seq("BY", "TARGET") 6506 else self._match_text_seq("BY", "SOURCE") 6507 ) 6508 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6509 6510 self._match(TokenType.THEN) 6511 6512 if self._match(TokenType.INSERT): 6513 _this = self._parse_star() 6514 if _this: 6515 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6516 else: 6517 then = self.expression( 6518 exp.Insert, 6519 this=self._parse_value(), 6520 expression=self._match_text_seq("VALUES") and self._parse_value(), 6521 ) 6522 elif self._match(TokenType.UPDATE): 6523 expressions = self._parse_star() 6524 if expressions: 6525 then = self.expression(exp.Update, expressions=expressions) 6526 else: 6527 then = self.expression( 6528 exp.Update, 6529 expressions=self._match(TokenType.SET) 6530 and self._parse_csv(self._parse_equality), 6531 ) 6532 elif self._match(TokenType.DELETE): 6533 then = self.expression(exp.Var, this=self._prev.text) 6534 else: 6535 then = None 6536 6537 whens.append( 6538 self.expression( 6539 exp.When, 6540 matched=matched, 6541 source=source, 6542 condition=condition, 6543 then=then, 6544 ) 6545 ) 6546 return whens 6547 6548 def _parse_show(self) -> t.Optional[exp.Expression]: 6549 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6550 if parser: 6551 return parser(self) 6552 return self._parse_as_command(self._prev) 6553 6554 def _parse_set_item_assignment( 6555 self, kind: t.Optional[str] = None 6556 ) -> t.Optional[exp.Expression]: 6557 index = self._index 6558 6559 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6560 return self._parse_set_transaction(global_=kind == "GLOBAL") 6561 6562 left = self._parse_primary() or self._parse_column() 6563 assignment_delimiter = self._match_texts(("=", "TO")) 6564 6565 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6566 self._retreat(index) 6567 return None 6568 6569 right = self._parse_statement() or self._parse_id_var() 6570 if isinstance(right, (exp.Column, exp.Identifier)): 6571 right = exp.var(right.name) 6572 6573 this = self.expression(exp.EQ, this=left, expression=right) 6574 return self.expression(exp.SetItem, this=this, kind=kind) 6575 6576 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6577 self._match_text_seq("TRANSACTION") 6578 characteristics = self._parse_csv( 6579 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6580 ) 6581 return self.expression( 6582 exp.SetItem, 6583 expressions=characteristics, 6584 kind="TRANSACTION", 6585 **{"global": global_}, # type: ignore 6586 ) 6587 6588 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6589 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6590 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6591 6592 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6593 index = self._index 6594 set_ = self.expression( 6595 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6596 ) 6597 6598 if self._curr: 6599 self._retreat(index) 6600 return self._parse_as_command(self._prev) 6601 6602 return set_ 6603 6604 def _parse_var_from_options( 6605 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6606 ) -> t.Optional[exp.Var]: 6607 start = self._curr 6608 if not start: 6609 return None 6610 6611 option = start.text.upper() 6612 continuations = options.get(option) 6613 6614 index = self._index 6615 self._advance() 6616 for keywords in continuations or []: 6617 if isinstance(keywords, str): 6618 keywords = (keywords,) 6619 6620 if self._match_text_seq(*keywords): 6621 option = f"{option} {' '.join(keywords)}" 6622 break 6623 else: 6624 if continuations or continuations is None: 6625 if raise_unmatched: 6626 self.raise_error(f"Unknown option {option}") 6627 6628 self._retreat(index) 6629 return None 6630 6631 return exp.var(option) 6632 6633 def _parse_as_command(self, start: Token) -> exp.Command: 6634 while self._curr: 6635 self._advance() 6636 text = self._find_sql(start, self._prev) 6637 size = len(start.text) 6638 self._warn_unsupported() 6639 return exp.Command(this=text[:size], expression=text[size:]) 6640 6641 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6642 settings = [] 6643 6644 self._match_l_paren() 6645 kind = self._parse_id_var() 6646 6647 if self._match(TokenType.L_PAREN): 6648 while True: 6649 key = self._parse_id_var() 6650 value = self._parse_primary() 6651 6652 if not key and value is None: 6653 break 6654 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6655 self._match(TokenType.R_PAREN) 6656 6657 self._match_r_paren() 6658 6659 return self.expression( 6660 exp.DictProperty, 6661 this=this, 6662 kind=kind.this if kind else None, 6663 settings=settings, 6664 ) 6665 6666 def _parse_dict_range(self, this: str) -> exp.DictRange: 6667 self._match_l_paren() 6668 has_min = self._match_text_seq("MIN") 6669 if has_min: 6670 min = self._parse_var() or self._parse_primary() 6671 self._match_text_seq("MAX") 6672 max = self._parse_var() or self._parse_primary() 6673 else: 6674 max = self._parse_var() or self._parse_primary() 6675 min = exp.Literal.number(0) 6676 self._match_r_paren() 6677 return self.expression(exp.DictRange, this=this, min=min, max=max) 6678 6679 def _parse_comprehension( 6680 self, this: t.Optional[exp.Expression] 6681 ) -> t.Optional[exp.Comprehension]: 6682 index = self._index 6683 expression = self._parse_column() 6684 if not self._match(TokenType.IN): 6685 self._retreat(index - 1) 6686 return None 6687 iterator = self._parse_column() 6688 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6689 return self.expression( 6690 exp.Comprehension, 6691 this=this, 6692 expression=expression, 6693 iterator=iterator, 6694 condition=condition, 6695 ) 6696 6697 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6698 if self._match(TokenType.HEREDOC_STRING): 6699 return self.expression(exp.Heredoc, this=self._prev.text) 6700 6701 if not self._match_text_seq("$"): 6702 return None 6703 6704 tags = ["$"] 6705 tag_text = None 6706 6707 if self._is_connected(): 6708 self._advance() 6709 tags.append(self._prev.text.upper()) 6710 else: 6711 self.raise_error("No closing $ found") 6712 6713 if tags[-1] != "$": 6714 if self._is_connected() and self._match_text_seq("$"): 6715 tag_text = tags[-1] 6716 tags.append("$") 6717 else: 6718 self.raise_error("No closing $ found") 6719 6720 heredoc_start = self._curr 6721 6722 while self._curr: 6723 if self._match_text_seq(*tags, advance=False): 6724 this = self._find_sql(heredoc_start, self._prev) 6725 self._advance(len(tags)) 6726 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6727 6728 self._advance() 6729 6730 self.raise_error(f"No closing {''.join(tags)} found") 6731 return None 6732 6733 def _find_parser( 6734 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6735 ) -> t.Optional[t.Callable]: 6736 if not self._curr: 6737 return None 6738 6739 index = self._index 6740 this = [] 6741 while True: 6742 # The current token might be multiple words 6743 curr = self._curr.text.upper() 6744 key = curr.split(" ") 6745 this.append(curr) 6746 6747 self._advance() 6748 result, trie = in_trie(trie, key) 6749 if result == TrieResult.FAILED: 6750 break 6751 6752 if result == TrieResult.EXISTS: 6753 subparser = parsers[" ".join(this)] 6754 return subparser 6755 6756 self._retreat(index) 6757 return None 6758 6759 def _match(self, token_type, advance=True, expression=None): 6760 if not self._curr: 6761 return None 6762 6763 if self._curr.token_type == token_type: 6764 if advance: 6765 self._advance() 6766 self._add_comments(expression) 6767 return True 6768 6769 return None 6770 6771 def _match_set(self, types, advance=True): 6772 if not self._curr: 6773 return None 6774 6775 if self._curr.token_type in types: 6776 if advance: 6777 self._advance() 6778 return True 6779 6780 return None 6781 6782 def _match_pair(self, token_type_a, token_type_b, advance=True): 6783 if not self._curr or not self._next: 6784 return None 6785 6786 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6787 if advance: 6788 self._advance(2) 6789 return True 6790 6791 return None 6792 6793 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6794 if not self._match(TokenType.L_PAREN, expression=expression): 6795 self.raise_error("Expecting (") 6796 6797 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6798 if not self._match(TokenType.R_PAREN, expression=expression): 6799 self.raise_error("Expecting )") 6800 6801 def _match_texts(self, texts, advance=True): 6802 if self._curr and self._curr.text.upper() in texts: 6803 if advance: 6804 self._advance() 6805 return True 6806 return None 6807 6808 def _match_text_seq(self, *texts, advance=True): 6809 index = self._index 6810 for text in texts: 6811 if self._curr and self._curr.text.upper() == text: 6812 self._advance() 6813 else: 6814 self._retreat(index) 6815 return None 6816 6817 if not advance: 6818 self._retreat(index) 6819 6820 return True 6821 6822 def _replace_lambda( 6823 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6824 ) -> t.Optional[exp.Expression]: 6825 if not node: 6826 return node 6827 6828 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6829 6830 for column in node.find_all(exp.Column): 6831 typ = lambda_types.get(column.parts[0].name) 6832 if typ is not None: 6833 dot_or_id = column.to_dot() if column.table else column.this 6834 6835 if typ: 6836 dot_or_id = self.expression( 6837 exp.Cast, 6838 this=dot_or_id, 6839 to=typ, 6840 ) 6841 6842 parent = column.parent 6843 6844 while isinstance(parent, exp.Dot): 6845 if not isinstance(parent.parent, exp.Dot): 6846 parent.replace(dot_or_id) 6847 break 6848 parent = parent.parent 6849 else: 6850 if column is node: 6851 node = dot_or_id 6852 else: 6853 column.replace(dot_or_id) 6854 return node 6855 6856 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6857 start = self._prev 6858 6859 # Not to be confused with TRUNCATE(number, decimals) function call 6860 if self._match(TokenType.L_PAREN): 6861 self._retreat(self._index - 2) 6862 return self._parse_function() 6863 6864 # Clickhouse supports TRUNCATE DATABASE as well 6865 is_database = self._match(TokenType.DATABASE) 6866 6867 self._match(TokenType.TABLE) 6868 6869 exists = self._parse_exists(not_=False) 6870 6871 expressions = self._parse_csv( 6872 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6873 ) 6874 6875 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6876 6877 if self._match_text_seq("RESTART", "IDENTITY"): 6878 identity = "RESTART" 6879 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6880 identity = "CONTINUE" 6881 else: 6882 identity = None 6883 6884 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6885 option = self._prev.text 6886 else: 6887 option = None 6888 6889 partition = self._parse_partition() 6890 6891 # Fallback case 6892 if self._curr: 6893 return self._parse_as_command(start) 6894 6895 return self.expression( 6896 exp.TruncateTable, 6897 expressions=expressions, 6898 is_database=is_database, 6899 exists=exists, 6900 cluster=cluster, 6901 identity=identity, 6902 option=option, 6903 partition=partition, 6904 ) 6905 6906 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6907 this = self._parse_ordered(self._parse_opclass) 6908 6909 if not self._match(TokenType.WITH): 6910 return this 6911 6912 op = self._parse_var(any_token=True) 6913 6914 return self.expression(exp.WithOperator, this=this, op=op) 6915 6916 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6917 self._match(TokenType.EQ) 6918 self._match(TokenType.L_PAREN) 6919 6920 opts: t.List[t.Optional[exp.Expression]] = [] 6921 while self._curr and not self._match(TokenType.R_PAREN): 6922 if self._match_text_seq("FORMAT_NAME", "="): 6923 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6924 # so we parse it separately to use _parse_field() 6925 prop = self.expression( 6926 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6927 ) 6928 opts.append(prop) 6929 else: 6930 opts.append(self._parse_property()) 6931 6932 self._match(TokenType.COMMA) 6933 6934 return opts 6935 6936 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6937 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6938 6939 options = [] 6940 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6941 option = self._parse_var(any_token=True) 6942 prev = self._prev.text.upper() 6943 6944 # Different dialects might separate options and values by white space, "=" and "AS" 6945 self._match(TokenType.EQ) 6946 self._match(TokenType.ALIAS) 6947 6948 param = self.expression(exp.CopyParameter, this=option) 6949 6950 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6951 TokenType.L_PAREN, advance=False 6952 ): 6953 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6954 param.set("expressions", self._parse_wrapped_options()) 6955 elif prev == "FILE_FORMAT": 6956 # T-SQL's external file format case 6957 param.set("expression", self._parse_field()) 6958 else: 6959 param.set("expression", self._parse_unquoted_field()) 6960 6961 options.append(param) 6962 self._match(sep) 6963 6964 return options 6965 6966 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6967 expr = self.expression(exp.Credentials) 6968 6969 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6970 expr.set("storage", self._parse_field()) 6971 if self._match_text_seq("CREDENTIALS"): 6972 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6973 creds = ( 6974 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6975 ) 6976 expr.set("credentials", creds) 6977 if self._match_text_seq("ENCRYPTION"): 6978 expr.set("encryption", self._parse_wrapped_options()) 6979 if self._match_text_seq("IAM_ROLE"): 6980 expr.set("iam_role", self._parse_field()) 6981 if self._match_text_seq("REGION"): 6982 expr.set("region", self._parse_field()) 6983 6984 return expr 6985 6986 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6987 return self._parse_field() 6988 6989 def _parse_copy(self) -> exp.Copy | exp.Command: 6990 start = self._prev 6991 6992 self._match(TokenType.INTO) 6993 6994 this = ( 6995 self._parse_select(nested=True, parse_subquery_alias=False) 6996 if self._match(TokenType.L_PAREN, advance=False) 6997 else self._parse_table(schema=True) 6998 ) 6999 7000 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7001 7002 files = self._parse_csv(self._parse_file_location) 7003 credentials = self._parse_credentials() 7004 7005 self._match_text_seq("WITH") 7006 7007 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7008 7009 # Fallback case 7010 if self._curr: 7011 return self._parse_as_command(start) 7012 7013 return self.expression( 7014 exp.Copy, 7015 this=this, 7016 kind=kind, 7017 credentials=credentials, 7018 files=files, 7019 params=params, 7020 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1278 def __init__( 1279 self, 1280 error_level: t.Optional[ErrorLevel] = None, 1281 error_message_context: int = 100, 1282 max_errors: int = 3, 1283 dialect: DialectType = None, 1284 ): 1285 from sqlglot.dialects import Dialect 1286 1287 self.error_level = error_level or ErrorLevel.IMMEDIATE 1288 self.error_message_context = error_message_context 1289 self.max_errors = max_errors 1290 self.dialect = Dialect.get_or_raise(dialect) 1291 self.reset()
1303 def parse( 1304 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1305 ) -> t.List[t.Optional[exp.Expression]]: 1306 """ 1307 Parses a list of tokens and returns a list of syntax trees, one tree 1308 per parsed SQL statement. 1309 1310 Args: 1311 raw_tokens: The list of tokens. 1312 sql: The original SQL string, used to produce helpful debug messages. 1313 1314 Returns: 1315 The list of the produced syntax trees. 1316 """ 1317 return self._parse( 1318 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1319 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1321 def parse_into( 1322 self, 1323 expression_types: exp.IntoType, 1324 raw_tokens: t.List[Token], 1325 sql: t.Optional[str] = None, 1326 ) -> t.List[t.Optional[exp.Expression]]: 1327 """ 1328 Parses a list of tokens into a given Expression type. If a collection of Expression 1329 types is given instead, this method will try to parse the token list into each one 1330 of them, stopping at the first for which the parsing succeeds. 1331 1332 Args: 1333 expression_types: The expression type(s) to try and parse the token list into. 1334 raw_tokens: The list of tokens. 1335 sql: The original SQL string, used to produce helpful debug messages. 1336 1337 Returns: 1338 The target Expression. 1339 """ 1340 errors = [] 1341 for expression_type in ensure_list(expression_types): 1342 parser = self.EXPRESSION_PARSERS.get(expression_type) 1343 if not parser: 1344 raise TypeError(f"No parser registered for {expression_type}") 1345 1346 try: 1347 return self._parse(parser, raw_tokens, sql) 1348 except ParseError as e: 1349 e.errors[0]["into_expression"] = expression_type 1350 errors.append(e) 1351 1352 raise ParseError( 1353 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1354 errors=merge_errors(errors), 1355 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1395 def check_errors(self) -> None: 1396 """Logs or raises any found errors, depending on the chosen error level setting.""" 1397 if self.error_level == ErrorLevel.WARN: 1398 for error in self.errors: 1399 logger.error(str(error)) 1400 elif self.error_level == ErrorLevel.RAISE and self.errors: 1401 raise ParseError( 1402 concat_messages(self.errors, self.max_errors), 1403 errors=merge_errors(self.errors), 1404 )
Logs or raises any found errors, depending on the chosen error level setting.
1406 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1407 """ 1408 Appends an error in the list of recorded errors or raises it, depending on the chosen 1409 error level setting. 1410 """ 1411 token = token or self._curr or self._prev or Token.string("") 1412 start = token.start 1413 end = token.end + 1 1414 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1415 highlight = self.sql[start:end] 1416 end_context = self.sql[end : end + self.error_message_context] 1417 1418 error = ParseError.new( 1419 f"{message}. Line {token.line}, Col: {token.col}.\n" 1420 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1421 description=message, 1422 line=token.line, 1423 col=token.col, 1424 start_context=start_context, 1425 highlight=highlight, 1426 end_context=end_context, 1427 ) 1428 1429 if self.error_level == ErrorLevel.IMMEDIATE: 1430 raise error 1431 1432 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1434 def expression( 1435 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1436 ) -> E: 1437 """ 1438 Creates a new, validated Expression. 1439 1440 Args: 1441 exp_class: The expression class to instantiate. 1442 comments: An optional list of comments to attach to the expression. 1443 kwargs: The arguments to set for the expression along with their respective values. 1444 1445 Returns: 1446 The target expression. 1447 """ 1448 instance = exp_class(**kwargs) 1449 instance.add_comments(comments) if comments else self._add_comments(instance) 1450 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1457 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1458 """ 1459 Validates an Expression, making sure that all its mandatory arguments are set. 1460 1461 Args: 1462 expression: The expression to validate. 1463 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1464 1465 Returns: 1466 The validated expression. 1467 """ 1468 if self.error_level != ErrorLevel.IGNORE: 1469 for error_message in expression.error_messages(args): 1470 self.raise_error(error_message) 1471 1472 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.