sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 32 33 34def build_like(args: t.List) -> exp.Escape | exp.Like: 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 48 # Default argument order is base, expression 49 this = seq_get(args, 0) 50 expression = seq_get(args, 1) 51 52 if expression: 53 if not dialect.LOG_BASE_FIRST: 54 this, expression = expression, this 55 return exp.Log(this=this, expression=expression) 56 57 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 58 59 60def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 61 def _builder(args: t.List, dialect: Dialect) -> E: 62 expression = expr_type( 63 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 64 ) 65 if len(args) > 2 and expr_type is exp.JSONExtract: 66 expression.set("expressions", args[2:]) 67 68 return expression 69 70 return _builder 71 72 73class _Parser(type): 74 def __new__(cls, clsname, bases, attrs): 75 klass = super().__new__(cls, clsname, bases, attrs) 76 77 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 78 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 79 80 return klass 81 82 83class Parser(metaclass=_Parser): 84 """ 85 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 86 87 Args: 88 error_level: The desired error level. 89 Default: ErrorLevel.IMMEDIATE 90 error_message_context: The amount of context to capture from a query string when displaying 91 the error message (in number of characters). 92 Default: 100 93 max_errors: Maximum number of error messages to include in a raised ParseError. 94 This is only relevant if error_level is ErrorLevel.RAISE. 95 Default: 3 96 """ 97 98 FUNCTIONS: t.Dict[str, t.Callable] = { 99 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 100 "CONCAT": lambda args, dialect: exp.Concat( 101 expressions=args, 102 safe=not dialect.STRICT_STRING_CONCAT, 103 coalesce=dialect.CONCAT_COALESCE, 104 ), 105 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "DATE_TO_DATE_STR": lambda args: exp.Cast( 111 this=seq_get(args, 0), 112 to=exp.DataType(this=exp.DataType.Type.TEXT), 113 ), 114 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 115 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 116 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 117 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 118 "LIKE": build_like, 119 "LOG": build_logarithm, 120 "TIME_TO_TIME_STR": lambda args: exp.Cast( 121 this=seq_get(args, 0), 122 to=exp.DataType(this=exp.DataType.Type.TEXT), 123 ), 124 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 125 this=exp.Cast( 126 this=seq_get(args, 0), 127 to=exp.DataType(this=exp.DataType.Type.TEXT), 128 ), 129 start=exp.Literal.number(1), 130 length=exp.Literal.number(10), 131 ), 132 "VAR_MAP": build_var_map, 133 } 134 135 NO_PAREN_FUNCTIONS = { 136 TokenType.CURRENT_DATE: exp.CurrentDate, 137 TokenType.CURRENT_DATETIME: exp.CurrentDate, 138 TokenType.CURRENT_TIME: exp.CurrentTime, 139 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 140 TokenType.CURRENT_USER: exp.CurrentUser, 141 } 142 143 STRUCT_TYPE_TOKENS = { 144 TokenType.NESTED, 145 TokenType.STRUCT, 146 } 147 148 NESTED_TYPE_TOKENS = { 149 TokenType.ARRAY, 150 TokenType.LOWCARDINALITY, 151 TokenType.MAP, 152 TokenType.NULLABLE, 153 *STRUCT_TYPE_TOKENS, 154 } 155 156 ENUM_TYPE_TOKENS = { 157 TokenType.ENUM, 158 TokenType.ENUM8, 159 TokenType.ENUM16, 160 } 161 162 AGGREGATE_TYPE_TOKENS = { 163 TokenType.AGGREGATEFUNCTION, 164 TokenType.SIMPLEAGGREGATEFUNCTION, 165 } 166 167 TYPE_TOKENS = { 168 TokenType.BIT, 169 TokenType.BOOLEAN, 170 TokenType.TINYINT, 171 TokenType.UTINYINT, 172 TokenType.SMALLINT, 173 TokenType.USMALLINT, 174 TokenType.INT, 175 TokenType.UINT, 176 TokenType.BIGINT, 177 TokenType.UBIGINT, 178 TokenType.INT128, 179 TokenType.UINT128, 180 TokenType.INT256, 181 TokenType.UINT256, 182 TokenType.MEDIUMINT, 183 TokenType.UMEDIUMINT, 184 TokenType.FIXEDSTRING, 185 TokenType.FLOAT, 186 TokenType.DOUBLE, 187 TokenType.CHAR, 188 TokenType.NCHAR, 189 TokenType.VARCHAR, 190 TokenType.NVARCHAR, 191 TokenType.BPCHAR, 192 TokenType.TEXT, 193 TokenType.MEDIUMTEXT, 194 TokenType.LONGTEXT, 195 TokenType.MEDIUMBLOB, 196 TokenType.LONGBLOB, 197 TokenType.BINARY, 198 TokenType.VARBINARY, 199 TokenType.JSON, 200 TokenType.JSONB, 201 TokenType.INTERVAL, 202 TokenType.TINYBLOB, 203 TokenType.TINYTEXT, 204 TokenType.TIME, 205 TokenType.TIMETZ, 206 TokenType.TIMESTAMP, 207 TokenType.TIMESTAMP_S, 208 TokenType.TIMESTAMP_MS, 209 TokenType.TIMESTAMP_NS, 210 TokenType.TIMESTAMPTZ, 211 TokenType.TIMESTAMPLTZ, 212 TokenType.DATETIME, 213 TokenType.DATETIME64, 214 TokenType.DATE, 215 TokenType.DATE32, 216 TokenType.INT4RANGE, 217 TokenType.INT4MULTIRANGE, 218 TokenType.INT8RANGE, 219 TokenType.INT8MULTIRANGE, 220 TokenType.NUMRANGE, 221 TokenType.NUMMULTIRANGE, 222 TokenType.TSRANGE, 223 TokenType.TSMULTIRANGE, 224 TokenType.TSTZRANGE, 225 TokenType.TSTZMULTIRANGE, 226 TokenType.DATERANGE, 227 TokenType.DATEMULTIRANGE, 228 TokenType.DECIMAL, 229 TokenType.UDECIMAL, 230 TokenType.BIGDECIMAL, 231 TokenType.UUID, 232 TokenType.GEOGRAPHY, 233 TokenType.GEOMETRY, 234 TokenType.HLLSKETCH, 235 TokenType.HSTORE, 236 TokenType.PSEUDO_TYPE, 237 TokenType.SUPER, 238 TokenType.SERIAL, 239 TokenType.SMALLSERIAL, 240 TokenType.BIGSERIAL, 241 TokenType.XML, 242 TokenType.YEAR, 243 TokenType.UNIQUEIDENTIFIER, 244 TokenType.USERDEFINED, 245 TokenType.MONEY, 246 TokenType.SMALLMONEY, 247 TokenType.ROWVERSION, 248 TokenType.IMAGE, 249 TokenType.VARIANT, 250 TokenType.OBJECT, 251 TokenType.OBJECT_IDENTIFIER, 252 TokenType.INET, 253 TokenType.IPADDRESS, 254 TokenType.IPPREFIX, 255 TokenType.IPV4, 256 TokenType.IPV6, 257 TokenType.UNKNOWN, 258 TokenType.NULL, 259 *ENUM_TYPE_TOKENS, 260 *NESTED_TYPE_TOKENS, 261 *AGGREGATE_TYPE_TOKENS, 262 } 263 264 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 265 TokenType.BIGINT: TokenType.UBIGINT, 266 TokenType.INT: TokenType.UINT, 267 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 268 TokenType.SMALLINT: TokenType.USMALLINT, 269 TokenType.TINYINT: TokenType.UTINYINT, 270 TokenType.DECIMAL: TokenType.UDECIMAL, 271 } 272 273 SUBQUERY_PREDICATES = { 274 TokenType.ANY: exp.Any, 275 TokenType.ALL: exp.All, 276 TokenType.EXISTS: exp.Exists, 277 TokenType.SOME: exp.Any, 278 } 279 280 RESERVED_TOKENS = { 281 *Tokenizer.SINGLE_TOKENS.values(), 282 TokenType.SELECT, 283 } 284 285 DB_CREATABLES = { 286 TokenType.DATABASE, 287 TokenType.SCHEMA, 288 TokenType.TABLE, 289 TokenType.VIEW, 290 TokenType.MODEL, 291 TokenType.DICTIONARY, 292 TokenType.STORAGE_INTEGRATION, 293 } 294 295 CREATABLES = { 296 TokenType.COLUMN, 297 TokenType.CONSTRAINT, 298 TokenType.FUNCTION, 299 TokenType.INDEX, 300 TokenType.PROCEDURE, 301 TokenType.FOREIGN_KEY, 302 *DB_CREATABLES, 303 } 304 305 # Tokens that can represent identifiers 306 ID_VAR_TOKENS = { 307 TokenType.VAR, 308 TokenType.ANTI, 309 TokenType.APPLY, 310 TokenType.ASC, 311 TokenType.AUTO_INCREMENT, 312 TokenType.BEGIN, 313 TokenType.BPCHAR, 314 TokenType.CACHE, 315 TokenType.CASE, 316 TokenType.COLLATE, 317 TokenType.COMMAND, 318 TokenType.COMMENT, 319 TokenType.COMMIT, 320 TokenType.CONSTRAINT, 321 TokenType.DEFAULT, 322 TokenType.DELETE, 323 TokenType.DESC, 324 TokenType.DESCRIBE, 325 TokenType.DICTIONARY, 326 TokenType.DIV, 327 TokenType.END, 328 TokenType.EXECUTE, 329 TokenType.ESCAPE, 330 TokenType.FALSE, 331 TokenType.FIRST, 332 TokenType.FILTER, 333 TokenType.FINAL, 334 TokenType.FORMAT, 335 TokenType.FULL, 336 TokenType.IS, 337 TokenType.ISNULL, 338 TokenType.INTERVAL, 339 TokenType.KEEP, 340 TokenType.KILL, 341 TokenType.LEFT, 342 TokenType.LOAD, 343 TokenType.MERGE, 344 TokenType.NATURAL, 345 TokenType.NEXT, 346 TokenType.OFFSET, 347 TokenType.OPERATOR, 348 TokenType.ORDINALITY, 349 TokenType.OVERLAPS, 350 TokenType.OVERWRITE, 351 TokenType.PARTITION, 352 TokenType.PERCENT, 353 TokenType.PIVOT, 354 TokenType.PRAGMA, 355 TokenType.RANGE, 356 TokenType.RECURSIVE, 357 TokenType.REFERENCES, 358 TokenType.REFRESH, 359 TokenType.REPLACE, 360 TokenType.RIGHT, 361 TokenType.ROW, 362 TokenType.ROWS, 363 TokenType.SEMI, 364 TokenType.SET, 365 TokenType.SETTINGS, 366 TokenType.SHOW, 367 TokenType.TEMPORARY, 368 TokenType.TOP, 369 TokenType.TRUE, 370 TokenType.UNIQUE, 371 TokenType.UNPIVOT, 372 TokenType.UPDATE, 373 TokenType.USE, 374 TokenType.VOLATILE, 375 TokenType.WINDOW, 376 *CREATABLES, 377 *SUBQUERY_PREDICATES, 378 *TYPE_TOKENS, 379 *NO_PAREN_FUNCTIONS, 380 } 381 382 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 383 384 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 385 TokenType.ANTI, 386 TokenType.APPLY, 387 TokenType.ASOF, 388 TokenType.FULL, 389 TokenType.LEFT, 390 TokenType.LOCK, 391 TokenType.NATURAL, 392 TokenType.OFFSET, 393 TokenType.RIGHT, 394 TokenType.SEMI, 395 TokenType.WINDOW, 396 } 397 398 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 399 400 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 401 402 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 403 404 FUNC_TOKENS = { 405 TokenType.COLLATE, 406 TokenType.COMMAND, 407 TokenType.CURRENT_DATE, 408 TokenType.CURRENT_DATETIME, 409 TokenType.CURRENT_TIMESTAMP, 410 TokenType.CURRENT_TIME, 411 TokenType.CURRENT_USER, 412 TokenType.FILTER, 413 TokenType.FIRST, 414 TokenType.FORMAT, 415 TokenType.GLOB, 416 TokenType.IDENTIFIER, 417 TokenType.INDEX, 418 TokenType.ISNULL, 419 TokenType.ILIKE, 420 TokenType.INSERT, 421 TokenType.LIKE, 422 TokenType.MERGE, 423 TokenType.OFFSET, 424 TokenType.PRIMARY_KEY, 425 TokenType.RANGE, 426 TokenType.REPLACE, 427 TokenType.RLIKE, 428 TokenType.ROW, 429 TokenType.UNNEST, 430 TokenType.VAR, 431 TokenType.LEFT, 432 TokenType.RIGHT, 433 TokenType.DATE, 434 TokenType.DATETIME, 435 TokenType.TABLE, 436 TokenType.TIMESTAMP, 437 TokenType.TIMESTAMPTZ, 438 TokenType.WINDOW, 439 TokenType.XOR, 440 *TYPE_TOKENS, 441 *SUBQUERY_PREDICATES, 442 } 443 444 CONJUNCTION = { 445 TokenType.AND: exp.And, 446 TokenType.OR: exp.Or, 447 } 448 449 EQUALITY = { 450 TokenType.COLON_EQ: exp.PropertyEQ, 451 TokenType.EQ: exp.EQ, 452 TokenType.NEQ: exp.NEQ, 453 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 454 } 455 456 COMPARISON = { 457 TokenType.GT: exp.GT, 458 TokenType.GTE: exp.GTE, 459 TokenType.LT: exp.LT, 460 TokenType.LTE: exp.LTE, 461 } 462 463 BITWISE = { 464 TokenType.AMP: exp.BitwiseAnd, 465 TokenType.CARET: exp.BitwiseXor, 466 TokenType.PIPE: exp.BitwiseOr, 467 } 468 469 TERM = { 470 TokenType.DASH: exp.Sub, 471 TokenType.PLUS: exp.Add, 472 TokenType.MOD: exp.Mod, 473 TokenType.COLLATE: exp.Collate, 474 } 475 476 FACTOR = { 477 TokenType.DIV: exp.IntDiv, 478 TokenType.LR_ARROW: exp.Distance, 479 TokenType.SLASH: exp.Div, 480 TokenType.STAR: exp.Mul, 481 } 482 483 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 484 485 TIMES = { 486 TokenType.TIME, 487 TokenType.TIMETZ, 488 } 489 490 TIMESTAMPS = { 491 TokenType.TIMESTAMP, 492 TokenType.TIMESTAMPTZ, 493 TokenType.TIMESTAMPLTZ, 494 *TIMES, 495 } 496 497 SET_OPERATIONS = { 498 TokenType.UNION, 499 TokenType.INTERSECT, 500 TokenType.EXCEPT, 501 } 502 503 JOIN_METHODS = { 504 TokenType.NATURAL, 505 TokenType.ASOF, 506 } 507 508 JOIN_SIDES = { 509 TokenType.LEFT, 510 TokenType.RIGHT, 511 TokenType.FULL, 512 } 513 514 JOIN_KINDS = { 515 TokenType.INNER, 516 TokenType.OUTER, 517 TokenType.CROSS, 518 TokenType.SEMI, 519 TokenType.ANTI, 520 } 521 522 JOIN_HINTS: t.Set[str] = set() 523 524 LAMBDAS = { 525 TokenType.ARROW: lambda self, expressions: self.expression( 526 exp.Lambda, 527 this=self._replace_lambda( 528 self._parse_conjunction(), 529 {node.name for node in expressions}, 530 ), 531 expressions=expressions, 532 ), 533 TokenType.FARROW: lambda self, expressions: self.expression( 534 exp.Kwarg, 535 this=exp.var(expressions[0].name), 536 expression=self._parse_conjunction(), 537 ), 538 } 539 540 COLUMN_OPERATORS = { 541 TokenType.DOT: None, 542 TokenType.DCOLON: lambda self, this, to: self.expression( 543 exp.Cast if self.STRICT_CAST else exp.TryCast, 544 this=this, 545 to=to, 546 ), 547 TokenType.ARROW: lambda self, this, path: self.expression( 548 exp.JSONExtract, 549 this=this, 550 expression=self.dialect.to_json_path(path), 551 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 552 ), 553 TokenType.DARROW: lambda self, this, path: self.expression( 554 exp.JSONExtractScalar, 555 this=this, 556 expression=self.dialect.to_json_path(path), 557 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 642 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 643 } 644 645 PRIMARY_PARSERS = { 646 TokenType.STRING: lambda self, token: self.expression( 647 exp.Literal, this=token.text, is_string=True 648 ), 649 TokenType.NUMBER: lambda self, token: self.expression( 650 exp.Literal, this=token.text, is_string=False 651 ), 652 TokenType.STAR: lambda self, _: self.expression( 653 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 654 ), 655 TokenType.NULL: lambda self, _: self.expression(exp.Null), 656 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 657 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 658 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 659 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 660 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 661 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 667 exp.RawString, this=token.text 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 675 } 676 677 PLACEHOLDER_PARSERS = { 678 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 679 TokenType.PARAMETER: lambda self: self._parse_parameter(), 680 TokenType.COLON: lambda self: ( 681 self.expression(exp.Placeholder, this=self._prev.text) 682 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 683 else None 684 ), 685 } 686 687 RANGE_PARSERS = { 688 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 689 TokenType.GLOB: binary_range_parser(exp.Glob), 690 TokenType.ILIKE: binary_range_parser(exp.ILike), 691 TokenType.IN: lambda self, this: self._parse_in(this), 692 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 693 TokenType.IS: lambda self, this: self._parse_is(this), 694 TokenType.LIKE: binary_range_parser(exp.Like), 695 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 696 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 697 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 698 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 699 } 700 701 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 702 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 703 "AUTO": lambda self: self._parse_auto_property(), 704 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 705 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 706 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHECKSUM": lambda self: self._parse_checksum(), 709 "CLUSTER BY": lambda self: self._parse_cluster(), 710 "CLUSTERED": lambda self: self._parse_clustered_by(), 711 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 712 exp.CollateProperty, **kwargs 713 ), 714 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 715 "CONTAINS": lambda self: self._parse_contains_property(), 716 "COPY": lambda self: self._parse_copy_property(), 717 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 718 "DEFINER": lambda self: self._parse_definer(), 719 "DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 721 ), 722 "DISTKEY": lambda self: self._parse_distkey(), 723 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 724 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 725 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 726 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 727 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 728 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 729 "FREESPACE": lambda self: self._parse_freespace(), 730 "HEAP": lambda self: self.expression(exp.HeapProperty), 731 "IMMUTABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 733 ), 734 "INHERITS": lambda self: self.expression( 735 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 736 ), 737 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 738 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 739 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 740 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 741 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 742 "LIKE": lambda self: self._parse_create_like(), 743 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 744 "LOCK": lambda self: self._parse_locking(), 745 "LOCKING": lambda self: self._parse_locking(), 746 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 747 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 748 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 749 "MODIFIES": lambda self: self._parse_modifies_property(), 750 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 751 "NO": lambda self: self._parse_no_property(), 752 "ON": lambda self: self._parse_on_property(), 753 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 754 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 755 "PARTITION": lambda self: self._parse_partitioned_of(), 756 "PARTITION BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 759 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 760 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 761 "READS": lambda self: self._parse_reads_property(), 762 "REMOTE": lambda self: self._parse_remote_with_connection(), 763 "RETURNS": lambda self: self._parse_returns(), 764 "ROW": lambda self: self._parse_row(), 765 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 766 "SAMPLE": lambda self: self.expression( 767 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 768 ), 769 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 770 "SETTINGS": lambda self: self.expression( 771 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 772 ), 773 "SORTKEY": lambda self: self._parse_sortkey(), 774 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 775 "STABLE": lambda self: self.expression( 776 exp.StabilityProperty, this=exp.Literal.string("STABLE") 777 ), 778 "STORED": lambda self: self._parse_stored(), 779 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 780 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 781 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 782 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 783 "TO": lambda self: self._parse_to_table(), 784 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 785 "TRANSFORM": lambda self: self.expression( 786 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 787 ), 788 "TTL": lambda self: self._parse_ttl(), 789 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 790 "VOLATILE": lambda self: self._parse_volatile_property(), 791 "WITH": lambda self: self._parse_with_property(), 792 } 793 794 CONSTRAINT_PARSERS = { 795 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 796 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 797 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 798 "CHARACTER SET": lambda self: self.expression( 799 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 800 ), 801 "CHECK": lambda self: self.expression( 802 exp.CheckColumnConstraint, 803 this=self._parse_wrapped(self._parse_conjunction), 804 enforced=self._match_text_seq("ENFORCED"), 805 ), 806 "COLLATE": lambda self: self.expression( 807 exp.CollateColumnConstraint, this=self._parse_var() 808 ), 809 "COMMENT": lambda self: self.expression( 810 exp.CommentColumnConstraint, this=self._parse_string() 811 ), 812 "COMPRESS": lambda self: self._parse_compress(), 813 "CLUSTERED": lambda self: self.expression( 814 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 815 ), 816 "NONCLUSTERED": lambda self: self.expression( 817 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 818 ), 819 "DEFAULT": lambda self: self.expression( 820 exp.DefaultColumnConstraint, this=self._parse_bitwise() 821 ), 822 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 823 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 824 "FORMAT": lambda self: self.expression( 825 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 826 ), 827 "GENERATED": lambda self: self._parse_generated_as_identity(), 828 "IDENTITY": lambda self: self._parse_auto_increment(), 829 "INLINE": lambda self: self._parse_inline(), 830 "LIKE": lambda self: self._parse_create_like(), 831 "NOT": lambda self: self._parse_not_constraint(), 832 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 833 "ON": lambda self: ( 834 self._match(TokenType.UPDATE) 835 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 836 ) 837 or self.expression(exp.OnProperty, this=self._parse_id_var()), 838 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 839 "PERIOD": lambda self: self._parse_period_for_system_time(), 840 "PRIMARY KEY": lambda self: self._parse_primary_key(), 841 "REFERENCES": lambda self: self._parse_references(match=False), 842 "TITLE": lambda self: self.expression( 843 exp.TitleColumnConstraint, this=self._parse_var_or_string() 844 ), 845 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 846 "UNIQUE": lambda self: self._parse_unique(), 847 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 848 "WITH": lambda self: self.expression( 849 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 850 ), 851 } 852 853 ALTER_PARSERS = { 854 "ADD": lambda self: self._parse_alter_table_add(), 855 "ALTER": lambda self: self._parse_alter_table_alter(), 856 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 857 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 858 "DROP": lambda self: self._parse_alter_table_drop(), 859 "RENAME": lambda self: self._parse_alter_table_rename(), 860 } 861 862 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 863 864 NO_PAREN_FUNCTION_PARSERS = { 865 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 866 "CASE": lambda self: self._parse_case(), 867 "IF": lambda self: self._parse_if(), 868 "NEXT": lambda self: self._parse_next_value_for(), 869 } 870 871 INVALID_FUNC_NAME_TOKENS = { 872 TokenType.IDENTIFIER, 873 TokenType.STRING, 874 } 875 876 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 877 878 FUNCTION_PARSERS = { 879 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 880 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 881 "DECODE": lambda self: self._parse_decode(), 882 "EXTRACT": lambda self: self._parse_extract(), 883 "JSON_OBJECT": lambda self: self._parse_json_object(), 884 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 885 "JSON_TABLE": lambda self: self._parse_json_table(), 886 "MATCH": lambda self: self._parse_match_against(), 887 "OPENJSON": lambda self: self._parse_open_json(), 888 "POSITION": lambda self: self._parse_position(), 889 "PREDICT": lambda self: self._parse_predict(), 890 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 891 "STRING_AGG": lambda self: self._parse_string_agg(), 892 "SUBSTRING": lambda self: self._parse_substring(), 893 "TRIM": lambda self: self._parse_trim(), 894 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 895 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 896 } 897 898 QUERY_MODIFIER_PARSERS = { 899 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 900 TokenType.WHERE: lambda self: ("where", self._parse_where()), 901 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 902 TokenType.HAVING: lambda self: ("having", self._parse_having()), 903 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 904 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 905 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 906 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 907 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 908 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 909 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 910 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 911 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 912 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 913 TokenType.CLUSTER_BY: lambda self: ( 914 "cluster", 915 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 916 ), 917 TokenType.DISTRIBUTE_BY: lambda self: ( 918 "distribute", 919 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 920 ), 921 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 922 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 923 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 924 } 925 926 SET_PARSERS = { 927 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 928 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 929 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 930 "TRANSACTION": lambda self: self._parse_set_transaction(), 931 } 932 933 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 934 935 TYPE_LITERAL_PARSERS = { 936 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 937 } 938 939 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 940 941 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 942 943 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 944 945 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 946 TRANSACTION_CHARACTERISTICS = { 947 "ISOLATION LEVEL REPEATABLE READ", 948 "ISOLATION LEVEL READ COMMITTED", 949 "ISOLATION LEVEL READ UNCOMMITTED", 950 "ISOLATION LEVEL SERIALIZABLE", 951 "READ WRITE", 952 "READ ONLY", 953 } 954 955 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 956 957 CLONE_KEYWORDS = {"CLONE", "COPY"} 958 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 959 960 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 961 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 962 963 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 964 965 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 966 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 967 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 968 969 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 970 971 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 972 973 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 974 975 DISTINCT_TOKENS = {TokenType.DISTINCT} 976 977 NULL_TOKENS = {TokenType.NULL} 978 979 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 980 981 STRICT_CAST = True 982 983 PREFIXED_PIVOT_COLUMNS = False 984 IDENTIFY_PIVOT_STRINGS = False 985 986 LOG_DEFAULTS_TO_LN = False 987 988 # Whether ADD is present for each column added by ALTER TABLE 989 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 990 991 # Whether the table sample clause expects CSV syntax 992 TABLESAMPLE_CSV = False 993 994 # Whether the SET command needs a delimiter (e.g. "=") for assignments 995 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 996 997 # Whether the TRIM function expects the characters to trim as its first argument 998 TRIM_PATTERN_FIRST = False 999 1000 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1001 STRING_ALIASES = False 1002 1003 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1004 MODIFIERS_ATTACHED_TO_UNION = True 1005 UNION_MODIFIERS = {"order", "limit", "offset"} 1006 1007 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1008 NO_PAREN_IF_COMMANDS = True 1009 1010 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1011 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1012 1013 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1014 # If this is True and '(' is not found, the keyword will be treated as an identifier 1015 VALUES_FOLLOWED_BY_PAREN = True 1016 1017 __slots__ = ( 1018 "error_level", 1019 "error_message_context", 1020 "max_errors", 1021 "dialect", 1022 "sql", 1023 "errors", 1024 "_tokens", 1025 "_index", 1026 "_curr", 1027 "_next", 1028 "_prev", 1029 "_prev_comments", 1030 ) 1031 1032 # Autofilled 1033 SHOW_TRIE: t.Dict = {} 1034 SET_TRIE: t.Dict = {} 1035 1036 def __init__( 1037 self, 1038 error_level: t.Optional[ErrorLevel] = None, 1039 error_message_context: int = 100, 1040 max_errors: int = 3, 1041 dialect: DialectType = None, 1042 ): 1043 from sqlglot.dialects import Dialect 1044 1045 self.error_level = error_level or ErrorLevel.IMMEDIATE 1046 self.error_message_context = error_message_context 1047 self.max_errors = max_errors 1048 self.dialect = Dialect.get_or_raise(dialect) 1049 self.reset() 1050 1051 def reset(self): 1052 self.sql = "" 1053 self.errors = [] 1054 self._tokens = [] 1055 self._index = 0 1056 self._curr = None 1057 self._next = None 1058 self._prev = None 1059 self._prev_comments = None 1060 1061 def parse( 1062 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1063 ) -> t.List[t.Optional[exp.Expression]]: 1064 """ 1065 Parses a list of tokens and returns a list of syntax trees, one tree 1066 per parsed SQL statement. 1067 1068 Args: 1069 raw_tokens: The list of tokens. 1070 sql: The original SQL string, used to produce helpful debug messages. 1071 1072 Returns: 1073 The list of the produced syntax trees. 1074 """ 1075 return self._parse( 1076 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1077 ) 1078 1079 def parse_into( 1080 self, 1081 expression_types: exp.IntoType, 1082 raw_tokens: t.List[Token], 1083 sql: t.Optional[str] = None, 1084 ) -> t.List[t.Optional[exp.Expression]]: 1085 """ 1086 Parses a list of tokens into a given Expression type. If a collection of Expression 1087 types is given instead, this method will try to parse the token list into each one 1088 of them, stopping at the first for which the parsing succeeds. 1089 1090 Args: 1091 expression_types: The expression type(s) to try and parse the token list into. 1092 raw_tokens: The list of tokens. 1093 sql: The original SQL string, used to produce helpful debug messages. 1094 1095 Returns: 1096 The target Expression. 1097 """ 1098 errors = [] 1099 for expression_type in ensure_list(expression_types): 1100 parser = self.EXPRESSION_PARSERS.get(expression_type) 1101 if not parser: 1102 raise TypeError(f"No parser registered for {expression_type}") 1103 1104 try: 1105 return self._parse(parser, raw_tokens, sql) 1106 except ParseError as e: 1107 e.errors[0]["into_expression"] = expression_type 1108 errors.append(e) 1109 1110 raise ParseError( 1111 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1112 errors=merge_errors(errors), 1113 ) from errors[-1] 1114 1115 def _parse( 1116 self, 1117 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1118 raw_tokens: t.List[Token], 1119 sql: t.Optional[str] = None, 1120 ) -> t.List[t.Optional[exp.Expression]]: 1121 self.reset() 1122 self.sql = sql or "" 1123 1124 total = len(raw_tokens) 1125 chunks: t.List[t.List[Token]] = [[]] 1126 1127 for i, token in enumerate(raw_tokens): 1128 if token.token_type == TokenType.SEMICOLON: 1129 if i < total - 1: 1130 chunks.append([]) 1131 else: 1132 chunks[-1].append(token) 1133 1134 expressions = [] 1135 1136 for tokens in chunks: 1137 self._index = -1 1138 self._tokens = tokens 1139 self._advance() 1140 1141 expressions.append(parse_method(self)) 1142 1143 if self._index < len(self._tokens): 1144 self.raise_error("Invalid expression / Unexpected token") 1145 1146 self.check_errors() 1147 1148 return expressions 1149 1150 def check_errors(self) -> None: 1151 """Logs or raises any found errors, depending on the chosen error level setting.""" 1152 if self.error_level == ErrorLevel.WARN: 1153 for error in self.errors: 1154 logger.error(str(error)) 1155 elif self.error_level == ErrorLevel.RAISE and self.errors: 1156 raise ParseError( 1157 concat_messages(self.errors, self.max_errors), 1158 errors=merge_errors(self.errors), 1159 ) 1160 1161 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1162 """ 1163 Appends an error in the list of recorded errors or raises it, depending on the chosen 1164 error level setting. 1165 """ 1166 token = token or self._curr or self._prev or Token.string("") 1167 start = token.start 1168 end = token.end + 1 1169 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1170 highlight = self.sql[start:end] 1171 end_context = self.sql[end : end + self.error_message_context] 1172 1173 error = ParseError.new( 1174 f"{message}. Line {token.line}, Col: {token.col}.\n" 1175 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1176 description=message, 1177 line=token.line, 1178 col=token.col, 1179 start_context=start_context, 1180 highlight=highlight, 1181 end_context=end_context, 1182 ) 1183 1184 if self.error_level == ErrorLevel.IMMEDIATE: 1185 raise error 1186 1187 self.errors.append(error) 1188 1189 def expression( 1190 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1191 ) -> E: 1192 """ 1193 Creates a new, validated Expression. 1194 1195 Args: 1196 exp_class: The expression class to instantiate. 1197 comments: An optional list of comments to attach to the expression. 1198 kwargs: The arguments to set for the expression along with their respective values. 1199 1200 Returns: 1201 The target expression. 1202 """ 1203 instance = exp_class(**kwargs) 1204 instance.add_comments(comments) if comments else self._add_comments(instance) 1205 return self.validate_expression(instance) 1206 1207 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1208 if expression and self._prev_comments: 1209 expression.add_comments(self._prev_comments) 1210 self._prev_comments = None 1211 1212 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1213 """ 1214 Validates an Expression, making sure that all its mandatory arguments are set. 1215 1216 Args: 1217 expression: The expression to validate. 1218 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1219 1220 Returns: 1221 The validated expression. 1222 """ 1223 if self.error_level != ErrorLevel.IGNORE: 1224 for error_message in expression.error_messages(args): 1225 self.raise_error(error_message) 1226 1227 return expression 1228 1229 def _find_sql(self, start: Token, end: Token) -> str: 1230 return self.sql[start.start : end.end + 1] 1231 1232 def _is_connected(self) -> bool: 1233 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1234 1235 def _advance(self, times: int = 1) -> None: 1236 self._index += times 1237 self._curr = seq_get(self._tokens, self._index) 1238 self._next = seq_get(self._tokens, self._index + 1) 1239 1240 if self._index > 0: 1241 self._prev = self._tokens[self._index - 1] 1242 self._prev_comments = self._prev.comments 1243 else: 1244 self._prev = None 1245 self._prev_comments = None 1246 1247 def _retreat(self, index: int) -> None: 1248 if index != self._index: 1249 self._advance(index - self._index) 1250 1251 def _warn_unsupported(self) -> None: 1252 if len(self._tokens) <= 1: 1253 return 1254 1255 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1256 # interested in emitting a warning for the one being currently processed. 1257 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1258 1259 logger.warning( 1260 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1261 ) 1262 1263 def _parse_command(self) -> exp.Command: 1264 self._warn_unsupported() 1265 return self.expression( 1266 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1267 ) 1268 1269 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1270 start = self._prev 1271 exists = self._parse_exists() if allow_exists else None 1272 1273 self._match(TokenType.ON) 1274 1275 kind = self._match_set(self.CREATABLES) and self._prev 1276 if not kind: 1277 return self._parse_as_command(start) 1278 1279 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1280 this = self._parse_user_defined_function(kind=kind.token_type) 1281 elif kind.token_type == TokenType.TABLE: 1282 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1283 elif kind.token_type == TokenType.COLUMN: 1284 this = self._parse_column() 1285 else: 1286 this = self._parse_id_var() 1287 1288 self._match(TokenType.IS) 1289 1290 return self.expression( 1291 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1292 ) 1293 1294 def _parse_to_table( 1295 self, 1296 ) -> exp.ToTableProperty: 1297 table = self._parse_table_parts(schema=True) 1298 return self.expression(exp.ToTableProperty, this=table) 1299 1300 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1301 def _parse_ttl(self) -> exp.Expression: 1302 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1303 this = self._parse_bitwise() 1304 1305 if self._match_text_seq("DELETE"): 1306 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1307 if self._match_text_seq("RECOMPRESS"): 1308 return self.expression( 1309 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1310 ) 1311 if self._match_text_seq("TO", "DISK"): 1312 return self.expression( 1313 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1314 ) 1315 if self._match_text_seq("TO", "VOLUME"): 1316 return self.expression( 1317 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1318 ) 1319 1320 return this 1321 1322 expressions = self._parse_csv(_parse_ttl_action) 1323 where = self._parse_where() 1324 group = self._parse_group() 1325 1326 aggregates = None 1327 if group and self._match(TokenType.SET): 1328 aggregates = self._parse_csv(self._parse_set_item) 1329 1330 return self.expression( 1331 exp.MergeTreeTTL, 1332 expressions=expressions, 1333 where=where, 1334 group=group, 1335 aggregates=aggregates, 1336 ) 1337 1338 def _parse_statement(self) -> t.Optional[exp.Expression]: 1339 if self._curr is None: 1340 return None 1341 1342 if self._match_set(self.STATEMENT_PARSERS): 1343 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1344 1345 if self._match_set(Tokenizer.COMMANDS): 1346 return self._parse_command() 1347 1348 expression = self._parse_expression() 1349 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1350 return self._parse_query_modifiers(expression) 1351 1352 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1353 start = self._prev 1354 temporary = self._match(TokenType.TEMPORARY) 1355 materialized = self._match_text_seq("MATERIALIZED") 1356 1357 kind = self._match_set(self.CREATABLES) and self._prev.text 1358 if not kind: 1359 return self._parse_as_command(start) 1360 1361 return self.expression( 1362 exp.Drop, 1363 comments=start.comments, 1364 exists=exists or self._parse_exists(), 1365 this=self._parse_table( 1366 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1367 ), 1368 kind=kind, 1369 temporary=temporary, 1370 materialized=materialized, 1371 cascade=self._match_text_seq("CASCADE"), 1372 constraints=self._match_text_seq("CONSTRAINTS"), 1373 purge=self._match_text_seq("PURGE"), 1374 ) 1375 1376 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1377 return ( 1378 self._match_text_seq("IF") 1379 and (not not_ or self._match(TokenType.NOT)) 1380 and self._match(TokenType.EXISTS) 1381 ) 1382 1383 def _parse_create(self) -> exp.Create | exp.Command: 1384 # Note: this can't be None because we've matched a statement parser 1385 start = self._prev 1386 comments = self._prev_comments 1387 1388 replace = ( 1389 start.token_type == TokenType.REPLACE 1390 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1391 or self._match_pair(TokenType.OR, TokenType.ALTER) 1392 ) 1393 unique = self._match(TokenType.UNIQUE) 1394 1395 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1396 self._advance() 1397 1398 properties = None 1399 create_token = self._match_set(self.CREATABLES) and self._prev 1400 1401 if not create_token: 1402 # exp.Properties.Location.POST_CREATE 1403 properties = self._parse_properties() 1404 create_token = self._match_set(self.CREATABLES) and self._prev 1405 1406 if not properties or not create_token: 1407 return self._parse_as_command(start) 1408 1409 exists = self._parse_exists(not_=True) 1410 this = None 1411 expression: t.Optional[exp.Expression] = None 1412 indexes = None 1413 no_schema_binding = None 1414 begin = None 1415 end = None 1416 clone = None 1417 1418 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1419 nonlocal properties 1420 if properties and temp_props: 1421 properties.expressions.extend(temp_props.expressions) 1422 elif temp_props: 1423 properties = temp_props 1424 1425 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1426 this = self._parse_user_defined_function(kind=create_token.token_type) 1427 1428 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1429 extend_props(self._parse_properties()) 1430 1431 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1432 1433 if not expression: 1434 if self._match(TokenType.COMMAND): 1435 expression = self._parse_as_command(self._prev) 1436 else: 1437 begin = self._match(TokenType.BEGIN) 1438 return_ = self._match_text_seq("RETURN") 1439 1440 if self._match(TokenType.STRING, advance=False): 1441 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1442 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1443 expression = self._parse_string() 1444 extend_props(self._parse_properties()) 1445 else: 1446 expression = self._parse_statement() 1447 1448 end = self._match_text_seq("END") 1449 1450 if return_: 1451 expression = self.expression(exp.Return, this=expression) 1452 elif create_token.token_type == TokenType.INDEX: 1453 this = self._parse_index(index=self._parse_id_var()) 1454 elif create_token.token_type in self.DB_CREATABLES: 1455 table_parts = self._parse_table_parts( 1456 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1457 ) 1458 1459 # exp.Properties.Location.POST_NAME 1460 self._match(TokenType.COMMA) 1461 extend_props(self._parse_properties(before=True)) 1462 1463 this = self._parse_schema(this=table_parts) 1464 1465 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1466 extend_props(self._parse_properties()) 1467 1468 self._match(TokenType.ALIAS) 1469 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1470 # exp.Properties.Location.POST_ALIAS 1471 extend_props(self._parse_properties()) 1472 1473 expression = self._parse_ddl_select() 1474 1475 if create_token.token_type == TokenType.TABLE: 1476 # exp.Properties.Location.POST_EXPRESSION 1477 extend_props(self._parse_properties()) 1478 1479 indexes = [] 1480 while True: 1481 index = self._parse_index() 1482 1483 # exp.Properties.Location.POST_INDEX 1484 extend_props(self._parse_properties()) 1485 1486 if not index: 1487 break 1488 else: 1489 self._match(TokenType.COMMA) 1490 indexes.append(index) 1491 elif create_token.token_type == TokenType.VIEW: 1492 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1493 no_schema_binding = True 1494 1495 shallow = self._match_text_seq("SHALLOW") 1496 1497 if self._match_texts(self.CLONE_KEYWORDS): 1498 copy = self._prev.text.lower() == "copy" 1499 clone = self.expression( 1500 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1501 ) 1502 1503 if self._curr: 1504 return self._parse_as_command(start) 1505 1506 return self.expression( 1507 exp.Create, 1508 comments=comments, 1509 this=this, 1510 kind=create_token.text.upper(), 1511 replace=replace, 1512 unique=unique, 1513 expression=expression, 1514 exists=exists, 1515 properties=properties, 1516 indexes=indexes, 1517 no_schema_binding=no_schema_binding, 1518 begin=begin, 1519 end=end, 1520 clone=clone, 1521 ) 1522 1523 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1524 # only used for teradata currently 1525 self._match(TokenType.COMMA) 1526 1527 kwargs = { 1528 "no": self._match_text_seq("NO"), 1529 "dual": self._match_text_seq("DUAL"), 1530 "before": self._match_text_seq("BEFORE"), 1531 "default": self._match_text_seq("DEFAULT"), 1532 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1533 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1534 "after": self._match_text_seq("AFTER"), 1535 "minimum": self._match_texts(("MIN", "MINIMUM")), 1536 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1537 } 1538 1539 if self._match_texts(self.PROPERTY_PARSERS): 1540 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1541 try: 1542 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1543 except TypeError: 1544 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1545 1546 return None 1547 1548 def _parse_property(self) -> t.Optional[exp.Expression]: 1549 if self._match_texts(self.PROPERTY_PARSERS): 1550 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1551 1552 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1553 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1554 1555 if self._match_text_seq("COMPOUND", "SORTKEY"): 1556 return self._parse_sortkey(compound=True) 1557 1558 if self._match_text_seq("SQL", "SECURITY"): 1559 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1560 1561 index = self._index 1562 key = self._parse_column() 1563 1564 if not self._match(TokenType.EQ): 1565 self._retreat(index) 1566 return None 1567 1568 return self.expression( 1569 exp.Property, 1570 this=key.to_dot() if isinstance(key, exp.Column) else key, 1571 value=self._parse_column() or self._parse_var(any_token=True), 1572 ) 1573 1574 def _parse_stored(self) -> exp.FileFormatProperty: 1575 self._match(TokenType.ALIAS) 1576 1577 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1578 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1579 1580 return self.expression( 1581 exp.FileFormatProperty, 1582 this=( 1583 self.expression( 1584 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1585 ) 1586 if input_format or output_format 1587 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1588 ), 1589 ) 1590 1591 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1592 self._match(TokenType.EQ) 1593 self._match(TokenType.ALIAS) 1594 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1595 1596 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1597 properties = [] 1598 while True: 1599 if before: 1600 prop = self._parse_property_before() 1601 else: 1602 prop = self._parse_property() 1603 1604 if not prop: 1605 break 1606 for p in ensure_list(prop): 1607 properties.append(p) 1608 1609 if properties: 1610 return self.expression(exp.Properties, expressions=properties) 1611 1612 return None 1613 1614 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1615 return self.expression( 1616 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1617 ) 1618 1619 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1620 if self._index >= 2: 1621 pre_volatile_token = self._tokens[self._index - 2] 1622 else: 1623 pre_volatile_token = None 1624 1625 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1626 return exp.VolatileProperty() 1627 1628 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1629 1630 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1631 self._match_pair(TokenType.EQ, TokenType.ON) 1632 1633 prop = self.expression(exp.WithSystemVersioningProperty) 1634 if self._match(TokenType.L_PAREN): 1635 self._match_text_seq("HISTORY_TABLE", "=") 1636 prop.set("this", self._parse_table_parts()) 1637 1638 if self._match(TokenType.COMMA): 1639 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1640 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1641 1642 self._match_r_paren() 1643 1644 return prop 1645 1646 def _parse_with_property( 1647 self, 1648 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1649 if self._match(TokenType.L_PAREN, advance=False): 1650 return self._parse_wrapped_csv(self._parse_property) 1651 1652 if self._match_text_seq("JOURNAL"): 1653 return self._parse_withjournaltable() 1654 1655 if self._match_text_seq("DATA"): 1656 return self._parse_withdata(no=False) 1657 elif self._match_text_seq("NO", "DATA"): 1658 return self._parse_withdata(no=True) 1659 1660 if not self._next: 1661 return None 1662 1663 return self._parse_withisolatedloading() 1664 1665 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1666 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1667 self._match(TokenType.EQ) 1668 1669 user = self._parse_id_var() 1670 self._match(TokenType.PARAMETER) 1671 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1672 1673 if not user or not host: 1674 return None 1675 1676 return exp.DefinerProperty(this=f"{user}@{host}") 1677 1678 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1679 self._match(TokenType.TABLE) 1680 self._match(TokenType.EQ) 1681 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1682 1683 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1684 return self.expression(exp.LogProperty, no=no) 1685 1686 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1687 return self.expression(exp.JournalProperty, **kwargs) 1688 1689 def _parse_checksum(self) -> exp.ChecksumProperty: 1690 self._match(TokenType.EQ) 1691 1692 on = None 1693 if self._match(TokenType.ON): 1694 on = True 1695 elif self._match_text_seq("OFF"): 1696 on = False 1697 1698 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1699 1700 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1701 return self.expression( 1702 exp.Cluster, 1703 expressions=( 1704 self._parse_wrapped_csv(self._parse_ordered) 1705 if wrapped 1706 else self._parse_csv(self._parse_ordered) 1707 ), 1708 ) 1709 1710 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1711 self._match_text_seq("BY") 1712 1713 self._match_l_paren() 1714 expressions = self._parse_csv(self._parse_column) 1715 self._match_r_paren() 1716 1717 if self._match_text_seq("SORTED", "BY"): 1718 self._match_l_paren() 1719 sorted_by = self._parse_csv(self._parse_ordered) 1720 self._match_r_paren() 1721 else: 1722 sorted_by = None 1723 1724 self._match(TokenType.INTO) 1725 buckets = self._parse_number() 1726 self._match_text_seq("BUCKETS") 1727 1728 return self.expression( 1729 exp.ClusteredByProperty, 1730 expressions=expressions, 1731 sorted_by=sorted_by, 1732 buckets=buckets, 1733 ) 1734 1735 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1736 if not self._match_text_seq("GRANTS"): 1737 self._retreat(self._index - 1) 1738 return None 1739 1740 return self.expression(exp.CopyGrantsProperty) 1741 1742 def _parse_freespace(self) -> exp.FreespaceProperty: 1743 self._match(TokenType.EQ) 1744 return self.expression( 1745 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1746 ) 1747 1748 def _parse_mergeblockratio( 1749 self, no: bool = False, default: bool = False 1750 ) -> exp.MergeBlockRatioProperty: 1751 if self._match(TokenType.EQ): 1752 return self.expression( 1753 exp.MergeBlockRatioProperty, 1754 this=self._parse_number(), 1755 percent=self._match(TokenType.PERCENT), 1756 ) 1757 1758 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1759 1760 def _parse_datablocksize( 1761 self, 1762 default: t.Optional[bool] = None, 1763 minimum: t.Optional[bool] = None, 1764 maximum: t.Optional[bool] = None, 1765 ) -> exp.DataBlocksizeProperty: 1766 self._match(TokenType.EQ) 1767 size = self._parse_number() 1768 1769 units = None 1770 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1771 units = self._prev.text 1772 1773 return self.expression( 1774 exp.DataBlocksizeProperty, 1775 size=size, 1776 units=units, 1777 default=default, 1778 minimum=minimum, 1779 maximum=maximum, 1780 ) 1781 1782 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1783 self._match(TokenType.EQ) 1784 always = self._match_text_seq("ALWAYS") 1785 manual = self._match_text_seq("MANUAL") 1786 never = self._match_text_seq("NEVER") 1787 default = self._match_text_seq("DEFAULT") 1788 1789 autotemp = None 1790 if self._match_text_seq("AUTOTEMP"): 1791 autotemp = self._parse_schema() 1792 1793 return self.expression( 1794 exp.BlockCompressionProperty, 1795 always=always, 1796 manual=manual, 1797 never=never, 1798 default=default, 1799 autotemp=autotemp, 1800 ) 1801 1802 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1803 no = self._match_text_seq("NO") 1804 concurrent = self._match_text_seq("CONCURRENT") 1805 self._match_text_seq("ISOLATED", "LOADING") 1806 for_all = self._match_text_seq("FOR", "ALL") 1807 for_insert = self._match_text_seq("FOR", "INSERT") 1808 for_none = self._match_text_seq("FOR", "NONE") 1809 return self.expression( 1810 exp.IsolatedLoadingProperty, 1811 no=no, 1812 concurrent=concurrent, 1813 for_all=for_all, 1814 for_insert=for_insert, 1815 for_none=for_none, 1816 ) 1817 1818 def _parse_locking(self) -> exp.LockingProperty: 1819 if self._match(TokenType.TABLE): 1820 kind = "TABLE" 1821 elif self._match(TokenType.VIEW): 1822 kind = "VIEW" 1823 elif self._match(TokenType.ROW): 1824 kind = "ROW" 1825 elif self._match_text_seq("DATABASE"): 1826 kind = "DATABASE" 1827 else: 1828 kind = None 1829 1830 if kind in ("DATABASE", "TABLE", "VIEW"): 1831 this = self._parse_table_parts() 1832 else: 1833 this = None 1834 1835 if self._match(TokenType.FOR): 1836 for_or_in = "FOR" 1837 elif self._match(TokenType.IN): 1838 for_or_in = "IN" 1839 else: 1840 for_or_in = None 1841 1842 if self._match_text_seq("ACCESS"): 1843 lock_type = "ACCESS" 1844 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1845 lock_type = "EXCLUSIVE" 1846 elif self._match_text_seq("SHARE"): 1847 lock_type = "SHARE" 1848 elif self._match_text_seq("READ"): 1849 lock_type = "READ" 1850 elif self._match_text_seq("WRITE"): 1851 lock_type = "WRITE" 1852 elif self._match_text_seq("CHECKSUM"): 1853 lock_type = "CHECKSUM" 1854 else: 1855 lock_type = None 1856 1857 override = self._match_text_seq("OVERRIDE") 1858 1859 return self.expression( 1860 exp.LockingProperty, 1861 this=this, 1862 kind=kind, 1863 for_or_in=for_or_in, 1864 lock_type=lock_type, 1865 override=override, 1866 ) 1867 1868 def _parse_partition_by(self) -> t.List[exp.Expression]: 1869 if self._match(TokenType.PARTITION_BY): 1870 return self._parse_csv(self._parse_conjunction) 1871 return [] 1872 1873 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1874 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1875 if self._match_text_seq("MINVALUE"): 1876 return exp.var("MINVALUE") 1877 if self._match_text_seq("MAXVALUE"): 1878 return exp.var("MAXVALUE") 1879 return self._parse_bitwise() 1880 1881 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1882 expression = None 1883 from_expressions = None 1884 to_expressions = None 1885 1886 if self._match(TokenType.IN): 1887 this = self._parse_wrapped_csv(self._parse_bitwise) 1888 elif self._match(TokenType.FROM): 1889 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1890 self._match_text_seq("TO") 1891 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1892 elif self._match_text_seq("WITH", "(", "MODULUS"): 1893 this = self._parse_number() 1894 self._match_text_seq(",", "REMAINDER") 1895 expression = self._parse_number() 1896 self._match_r_paren() 1897 else: 1898 self.raise_error("Failed to parse partition bound spec.") 1899 1900 return self.expression( 1901 exp.PartitionBoundSpec, 1902 this=this, 1903 expression=expression, 1904 from_expressions=from_expressions, 1905 to_expressions=to_expressions, 1906 ) 1907 1908 # https://www.postgresql.org/docs/current/sql-createtable.html 1909 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1910 if not self._match_text_seq("OF"): 1911 self._retreat(self._index - 1) 1912 return None 1913 1914 this = self._parse_table(schema=True) 1915 1916 if self._match(TokenType.DEFAULT): 1917 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1918 elif self._match_text_seq("FOR", "VALUES"): 1919 expression = self._parse_partition_bound_spec() 1920 else: 1921 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1922 1923 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1924 1925 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1926 self._match(TokenType.EQ) 1927 return self.expression( 1928 exp.PartitionedByProperty, 1929 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1930 ) 1931 1932 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1933 if self._match_text_seq("AND", "STATISTICS"): 1934 statistics = True 1935 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1936 statistics = False 1937 else: 1938 statistics = None 1939 1940 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1941 1942 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1943 if self._match_text_seq("SQL"): 1944 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1945 return None 1946 1947 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1948 if self._match_text_seq("SQL", "DATA"): 1949 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1950 return None 1951 1952 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1953 if self._match_text_seq("PRIMARY", "INDEX"): 1954 return exp.NoPrimaryIndexProperty() 1955 if self._match_text_seq("SQL"): 1956 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1957 return None 1958 1959 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1960 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1961 return exp.OnCommitProperty() 1962 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1963 return exp.OnCommitProperty(delete=True) 1964 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1965 1966 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1967 if self._match_text_seq("SQL", "DATA"): 1968 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1969 return None 1970 1971 def _parse_distkey(self) -> exp.DistKeyProperty: 1972 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1973 1974 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1975 table = self._parse_table(schema=True) 1976 1977 options = [] 1978 while self._match_texts(("INCLUDING", "EXCLUDING")): 1979 this = self._prev.text.upper() 1980 1981 id_var = self._parse_id_var() 1982 if not id_var: 1983 return None 1984 1985 options.append( 1986 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1987 ) 1988 1989 return self.expression(exp.LikeProperty, this=table, expressions=options) 1990 1991 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1992 return self.expression( 1993 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1994 ) 1995 1996 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1997 self._match(TokenType.EQ) 1998 return self.expression( 1999 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2000 ) 2001 2002 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2003 self._match_text_seq("WITH", "CONNECTION") 2004 return self.expression( 2005 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2006 ) 2007 2008 def _parse_returns(self) -> exp.ReturnsProperty: 2009 value: t.Optional[exp.Expression] 2010 is_table = self._match(TokenType.TABLE) 2011 2012 if is_table: 2013 if self._match(TokenType.LT): 2014 value = self.expression( 2015 exp.Schema, 2016 this="TABLE", 2017 expressions=self._parse_csv(self._parse_struct_types), 2018 ) 2019 if not self._match(TokenType.GT): 2020 self.raise_error("Expecting >") 2021 else: 2022 value = self._parse_schema(exp.var("TABLE")) 2023 else: 2024 value = self._parse_types() 2025 2026 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2027 2028 def _parse_describe(self) -> exp.Describe: 2029 kind = self._match_set(self.CREATABLES) and self._prev.text 2030 extended = self._match_text_seq("EXTENDED") 2031 this = self._parse_table(schema=True) 2032 properties = self._parse_properties() 2033 expressions = properties.expressions if properties else None 2034 return self.expression( 2035 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2036 ) 2037 2038 def _parse_insert(self) -> exp.Insert: 2039 comments = ensure_list(self._prev_comments) 2040 overwrite = self._match(TokenType.OVERWRITE) 2041 ignore = self._match(TokenType.IGNORE) 2042 local = self._match_text_seq("LOCAL") 2043 alternative = None 2044 2045 if self._match_text_seq("DIRECTORY"): 2046 this: t.Optional[exp.Expression] = self.expression( 2047 exp.Directory, 2048 this=self._parse_var_or_string(), 2049 local=local, 2050 row_format=self._parse_row_format(match_row=True), 2051 ) 2052 else: 2053 if self._match(TokenType.OR): 2054 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2055 2056 self._match(TokenType.INTO) 2057 comments += ensure_list(self._prev_comments) 2058 self._match(TokenType.TABLE) 2059 this = self._parse_table(schema=True) 2060 2061 returning = self._parse_returning() 2062 2063 return self.expression( 2064 exp.Insert, 2065 comments=comments, 2066 this=this, 2067 by_name=self._match_text_seq("BY", "NAME"), 2068 exists=self._parse_exists(), 2069 partition=self._parse_partition(), 2070 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2071 and self._parse_conjunction(), 2072 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2073 conflict=self._parse_on_conflict(), 2074 returning=returning or self._parse_returning(), 2075 overwrite=overwrite, 2076 alternative=alternative, 2077 ignore=ignore, 2078 ) 2079 2080 def _parse_kill(self) -> exp.Kill: 2081 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2082 2083 return self.expression( 2084 exp.Kill, 2085 this=self._parse_primary(), 2086 kind=kind, 2087 ) 2088 2089 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2090 conflict = self._match_text_seq("ON", "CONFLICT") 2091 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2092 2093 if not conflict and not duplicate: 2094 return None 2095 2096 nothing = None 2097 expressions = None 2098 key = None 2099 constraint = None 2100 2101 if conflict: 2102 if self._match_text_seq("ON", "CONSTRAINT"): 2103 constraint = self._parse_id_var() 2104 else: 2105 key = self._parse_csv(self._parse_value) 2106 2107 self._match_text_seq("DO") 2108 if self._match_text_seq("NOTHING"): 2109 nothing = True 2110 else: 2111 self._match(TokenType.UPDATE) 2112 self._match(TokenType.SET) 2113 expressions = self._parse_csv(self._parse_equality) 2114 2115 return self.expression( 2116 exp.OnConflict, 2117 duplicate=duplicate, 2118 expressions=expressions, 2119 nothing=nothing, 2120 key=key, 2121 constraint=constraint, 2122 ) 2123 2124 def _parse_returning(self) -> t.Optional[exp.Returning]: 2125 if not self._match(TokenType.RETURNING): 2126 return None 2127 return self.expression( 2128 exp.Returning, 2129 expressions=self._parse_csv(self._parse_expression), 2130 into=self._match(TokenType.INTO) and self._parse_table_part(), 2131 ) 2132 2133 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2134 if not self._match(TokenType.FORMAT): 2135 return None 2136 return self._parse_row_format() 2137 2138 def _parse_row_format( 2139 self, match_row: bool = False 2140 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2141 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2142 return None 2143 2144 if self._match_text_seq("SERDE"): 2145 this = self._parse_string() 2146 2147 serde_properties = None 2148 if self._match(TokenType.SERDE_PROPERTIES): 2149 serde_properties = self.expression( 2150 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2151 ) 2152 2153 return self.expression( 2154 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2155 ) 2156 2157 self._match_text_seq("DELIMITED") 2158 2159 kwargs = {} 2160 2161 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2162 kwargs["fields"] = self._parse_string() 2163 if self._match_text_seq("ESCAPED", "BY"): 2164 kwargs["escaped"] = self._parse_string() 2165 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2166 kwargs["collection_items"] = self._parse_string() 2167 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2168 kwargs["map_keys"] = self._parse_string() 2169 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2170 kwargs["lines"] = self._parse_string() 2171 if self._match_text_seq("NULL", "DEFINED", "AS"): 2172 kwargs["null"] = self._parse_string() 2173 2174 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2175 2176 def _parse_load(self) -> exp.LoadData | exp.Command: 2177 if self._match_text_seq("DATA"): 2178 local = self._match_text_seq("LOCAL") 2179 self._match_text_seq("INPATH") 2180 inpath = self._parse_string() 2181 overwrite = self._match(TokenType.OVERWRITE) 2182 self._match_pair(TokenType.INTO, TokenType.TABLE) 2183 2184 return self.expression( 2185 exp.LoadData, 2186 this=self._parse_table(schema=True), 2187 local=local, 2188 overwrite=overwrite, 2189 inpath=inpath, 2190 partition=self._parse_partition(), 2191 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2192 serde=self._match_text_seq("SERDE") and self._parse_string(), 2193 ) 2194 return self._parse_as_command(self._prev) 2195 2196 def _parse_delete(self) -> exp.Delete: 2197 # This handles MySQL's "Multiple-Table Syntax" 2198 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2199 tables = None 2200 comments = self._prev_comments 2201 if not self._match(TokenType.FROM, advance=False): 2202 tables = self._parse_csv(self._parse_table) or None 2203 2204 returning = self._parse_returning() 2205 2206 return self.expression( 2207 exp.Delete, 2208 comments=comments, 2209 tables=tables, 2210 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2211 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2212 where=self._parse_where(), 2213 returning=returning or self._parse_returning(), 2214 limit=self._parse_limit(), 2215 ) 2216 2217 def _parse_update(self) -> exp.Update: 2218 comments = self._prev_comments 2219 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2220 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2221 returning = self._parse_returning() 2222 return self.expression( 2223 exp.Update, 2224 comments=comments, 2225 **{ # type: ignore 2226 "this": this, 2227 "expressions": expressions, 2228 "from": self._parse_from(joins=True), 2229 "where": self._parse_where(), 2230 "returning": returning or self._parse_returning(), 2231 "order": self._parse_order(), 2232 "limit": self._parse_limit(), 2233 }, 2234 ) 2235 2236 def _parse_uncache(self) -> exp.Uncache: 2237 if not self._match(TokenType.TABLE): 2238 self.raise_error("Expecting TABLE after UNCACHE") 2239 2240 return self.expression( 2241 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2242 ) 2243 2244 def _parse_cache(self) -> exp.Cache: 2245 lazy = self._match_text_seq("LAZY") 2246 self._match(TokenType.TABLE) 2247 table = self._parse_table(schema=True) 2248 2249 options = [] 2250 if self._match_text_seq("OPTIONS"): 2251 self._match_l_paren() 2252 k = self._parse_string() 2253 self._match(TokenType.EQ) 2254 v = self._parse_string() 2255 options = [k, v] 2256 self._match_r_paren() 2257 2258 self._match(TokenType.ALIAS) 2259 return self.expression( 2260 exp.Cache, 2261 this=table, 2262 lazy=lazy, 2263 options=options, 2264 expression=self._parse_select(nested=True), 2265 ) 2266 2267 def _parse_partition(self) -> t.Optional[exp.Partition]: 2268 if not self._match(TokenType.PARTITION): 2269 return None 2270 2271 return self.expression( 2272 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2273 ) 2274 2275 def _parse_value(self) -> exp.Tuple: 2276 if self._match(TokenType.L_PAREN): 2277 expressions = self._parse_csv(self._parse_expression) 2278 self._match_r_paren() 2279 return self.expression(exp.Tuple, expressions=expressions) 2280 2281 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2282 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2283 2284 def _parse_projections(self) -> t.List[exp.Expression]: 2285 return self._parse_expressions() 2286 2287 def _parse_select( 2288 self, 2289 nested: bool = False, 2290 table: bool = False, 2291 parse_subquery_alias: bool = True, 2292 parse_set_operation: bool = True, 2293 ) -> t.Optional[exp.Expression]: 2294 cte = self._parse_with() 2295 2296 if cte: 2297 this = self._parse_statement() 2298 2299 if not this: 2300 self.raise_error("Failed to parse any statement following CTE") 2301 return cte 2302 2303 if "with" in this.arg_types: 2304 this.set("with", cte) 2305 else: 2306 self.raise_error(f"{this.key} does not support CTE") 2307 this = cte 2308 2309 return this 2310 2311 # duckdb supports leading with FROM x 2312 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2313 2314 if self._match(TokenType.SELECT): 2315 comments = self._prev_comments 2316 2317 hint = self._parse_hint() 2318 all_ = self._match(TokenType.ALL) 2319 distinct = self._match_set(self.DISTINCT_TOKENS) 2320 2321 kind = ( 2322 self._match(TokenType.ALIAS) 2323 and self._match_texts(("STRUCT", "VALUE")) 2324 and self._prev.text.upper() 2325 ) 2326 2327 if distinct: 2328 distinct = self.expression( 2329 exp.Distinct, 2330 on=self._parse_value() if self._match(TokenType.ON) else None, 2331 ) 2332 2333 if all_ and distinct: 2334 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2335 2336 limit = self._parse_limit(top=True) 2337 projections = self._parse_projections() 2338 2339 this = self.expression( 2340 exp.Select, 2341 kind=kind, 2342 hint=hint, 2343 distinct=distinct, 2344 expressions=projections, 2345 limit=limit, 2346 ) 2347 this.comments = comments 2348 2349 into = self._parse_into() 2350 if into: 2351 this.set("into", into) 2352 2353 if not from_: 2354 from_ = self._parse_from() 2355 2356 if from_: 2357 this.set("from", from_) 2358 2359 this = self._parse_query_modifiers(this) 2360 elif (table or nested) and self._match(TokenType.L_PAREN): 2361 if self._match(TokenType.PIVOT): 2362 this = self._parse_simplified_pivot() 2363 elif self._match(TokenType.FROM): 2364 this = exp.select("*").from_( 2365 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2366 ) 2367 else: 2368 this = ( 2369 self._parse_table() 2370 if table 2371 else self._parse_select(nested=True, parse_set_operation=False) 2372 ) 2373 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2374 2375 self._match_r_paren() 2376 2377 # We return early here so that the UNION isn't attached to the subquery by the 2378 # following call to _parse_set_operations, but instead becomes the parent node 2379 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2380 elif self._match(TokenType.VALUES, advance=False): 2381 this = self._parse_derived_table_values() 2382 elif from_: 2383 this = exp.select("*").from_(from_.this, copy=False) 2384 else: 2385 this = None 2386 2387 if parse_set_operation: 2388 return self._parse_set_operations(this) 2389 return this 2390 2391 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2392 if not skip_with_token and not self._match(TokenType.WITH): 2393 return None 2394 2395 comments = self._prev_comments 2396 recursive = self._match(TokenType.RECURSIVE) 2397 2398 expressions = [] 2399 while True: 2400 expressions.append(self._parse_cte()) 2401 2402 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2403 break 2404 else: 2405 self._match(TokenType.WITH) 2406 2407 return self.expression( 2408 exp.With, comments=comments, expressions=expressions, recursive=recursive 2409 ) 2410 2411 def _parse_cte(self) -> exp.CTE: 2412 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2413 if not alias or not alias.this: 2414 self.raise_error("Expected CTE to have alias") 2415 2416 self._match(TokenType.ALIAS) 2417 return self.expression( 2418 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2419 ) 2420 2421 def _parse_table_alias( 2422 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2423 ) -> t.Optional[exp.TableAlias]: 2424 any_token = self._match(TokenType.ALIAS) 2425 alias = ( 2426 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2427 or self._parse_string_as_identifier() 2428 ) 2429 2430 index = self._index 2431 if self._match(TokenType.L_PAREN): 2432 columns = self._parse_csv(self._parse_function_parameter) 2433 self._match_r_paren() if columns else self._retreat(index) 2434 else: 2435 columns = None 2436 2437 if not alias and not columns: 2438 return None 2439 2440 return self.expression(exp.TableAlias, this=alias, columns=columns) 2441 2442 def _parse_subquery( 2443 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2444 ) -> t.Optional[exp.Subquery]: 2445 if not this: 2446 return None 2447 2448 return self.expression( 2449 exp.Subquery, 2450 this=this, 2451 pivots=self._parse_pivots(), 2452 alias=self._parse_table_alias() if parse_alias else None, 2453 ) 2454 2455 def _parse_query_modifiers( 2456 self, this: t.Optional[exp.Expression] 2457 ) -> t.Optional[exp.Expression]: 2458 if isinstance(this, self.MODIFIABLES): 2459 for join in iter(self._parse_join, None): 2460 this.append("joins", join) 2461 for lateral in iter(self._parse_lateral, None): 2462 this.append("laterals", lateral) 2463 2464 while True: 2465 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2466 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2467 key, expression = parser(self) 2468 2469 if expression: 2470 this.set(key, expression) 2471 if key == "limit": 2472 offset = expression.args.pop("offset", None) 2473 2474 if offset: 2475 offset = exp.Offset(expression=offset) 2476 this.set("offset", offset) 2477 2478 limit_by_expressions = expression.expressions 2479 expression.set("expressions", None) 2480 offset.set("expressions", limit_by_expressions) 2481 continue 2482 break 2483 return this 2484 2485 def _parse_hint(self) -> t.Optional[exp.Hint]: 2486 if self._match(TokenType.HINT): 2487 hints = [] 2488 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2489 hints.extend(hint) 2490 2491 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2492 self.raise_error("Expected */ after HINT") 2493 2494 return self.expression(exp.Hint, expressions=hints) 2495 2496 return None 2497 2498 def _parse_into(self) -> t.Optional[exp.Into]: 2499 if not self._match(TokenType.INTO): 2500 return None 2501 2502 temp = self._match(TokenType.TEMPORARY) 2503 unlogged = self._match_text_seq("UNLOGGED") 2504 self._match(TokenType.TABLE) 2505 2506 return self.expression( 2507 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2508 ) 2509 2510 def _parse_from( 2511 self, joins: bool = False, skip_from_token: bool = False 2512 ) -> t.Optional[exp.From]: 2513 if not skip_from_token and not self._match(TokenType.FROM): 2514 return None 2515 2516 return self.expression( 2517 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2518 ) 2519 2520 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2521 if not self._match(TokenType.MATCH_RECOGNIZE): 2522 return None 2523 2524 self._match_l_paren() 2525 2526 partition = self._parse_partition_by() 2527 order = self._parse_order() 2528 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2529 2530 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2531 rows = exp.var("ONE ROW PER MATCH") 2532 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2533 text = "ALL ROWS PER MATCH" 2534 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2535 text += " SHOW EMPTY MATCHES" 2536 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2537 text += " OMIT EMPTY MATCHES" 2538 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2539 text += " WITH UNMATCHED ROWS" 2540 rows = exp.var(text) 2541 else: 2542 rows = None 2543 2544 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2545 text = "AFTER MATCH SKIP" 2546 if self._match_text_seq("PAST", "LAST", "ROW"): 2547 text += " PAST LAST ROW" 2548 elif self._match_text_seq("TO", "NEXT", "ROW"): 2549 text += " TO NEXT ROW" 2550 elif self._match_text_seq("TO", "FIRST"): 2551 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2552 elif self._match_text_seq("TO", "LAST"): 2553 text += f" TO LAST {self._advance_any().text}" # type: ignore 2554 after = exp.var(text) 2555 else: 2556 after = None 2557 2558 if self._match_text_seq("PATTERN"): 2559 self._match_l_paren() 2560 2561 if not self._curr: 2562 self.raise_error("Expecting )", self._curr) 2563 2564 paren = 1 2565 start = self._curr 2566 2567 while self._curr and paren > 0: 2568 if self._curr.token_type == TokenType.L_PAREN: 2569 paren += 1 2570 if self._curr.token_type == TokenType.R_PAREN: 2571 paren -= 1 2572 2573 end = self._prev 2574 self._advance() 2575 2576 if paren > 0: 2577 self.raise_error("Expecting )", self._curr) 2578 2579 pattern = exp.var(self._find_sql(start, end)) 2580 else: 2581 pattern = None 2582 2583 define = ( 2584 self._parse_csv(self._parse_name_as_expression) 2585 if self._match_text_seq("DEFINE") 2586 else None 2587 ) 2588 2589 self._match_r_paren() 2590 2591 return self.expression( 2592 exp.MatchRecognize, 2593 partition_by=partition, 2594 order=order, 2595 measures=measures, 2596 rows=rows, 2597 after=after, 2598 pattern=pattern, 2599 define=define, 2600 alias=self._parse_table_alias(), 2601 ) 2602 2603 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2604 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2605 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2606 cross_apply = False 2607 2608 if cross_apply is not None: 2609 this = self._parse_select(table=True) 2610 view = None 2611 outer = None 2612 elif self._match(TokenType.LATERAL): 2613 this = self._parse_select(table=True) 2614 view = self._match(TokenType.VIEW) 2615 outer = self._match(TokenType.OUTER) 2616 else: 2617 return None 2618 2619 if not this: 2620 this = ( 2621 self._parse_unnest() 2622 or self._parse_function() 2623 or self._parse_id_var(any_token=False) 2624 ) 2625 2626 while self._match(TokenType.DOT): 2627 this = exp.Dot( 2628 this=this, 2629 expression=self._parse_function() or self._parse_id_var(any_token=False), 2630 ) 2631 2632 if view: 2633 table = self._parse_id_var(any_token=False) 2634 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2635 table_alias: t.Optional[exp.TableAlias] = self.expression( 2636 exp.TableAlias, this=table, columns=columns 2637 ) 2638 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2639 # We move the alias from the lateral's child node to the lateral itself 2640 table_alias = this.args["alias"].pop() 2641 else: 2642 table_alias = self._parse_table_alias() 2643 2644 return self.expression( 2645 exp.Lateral, 2646 this=this, 2647 view=view, 2648 outer=outer, 2649 alias=table_alias, 2650 cross_apply=cross_apply, 2651 ) 2652 2653 def _parse_join_parts( 2654 self, 2655 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2656 return ( 2657 self._match_set(self.JOIN_METHODS) and self._prev, 2658 self._match_set(self.JOIN_SIDES) and self._prev, 2659 self._match_set(self.JOIN_KINDS) and self._prev, 2660 ) 2661 2662 def _parse_join( 2663 self, skip_join_token: bool = False, parse_bracket: bool = False 2664 ) -> t.Optional[exp.Join]: 2665 if self._match(TokenType.COMMA): 2666 return self.expression(exp.Join, this=self._parse_table()) 2667 2668 index = self._index 2669 method, side, kind = self._parse_join_parts() 2670 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2671 join = self._match(TokenType.JOIN) 2672 2673 if not skip_join_token and not join: 2674 self._retreat(index) 2675 kind = None 2676 method = None 2677 side = None 2678 2679 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2680 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2681 2682 if not skip_join_token and not join and not outer_apply and not cross_apply: 2683 return None 2684 2685 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2686 2687 if method: 2688 kwargs["method"] = method.text 2689 if side: 2690 kwargs["side"] = side.text 2691 if kind: 2692 kwargs["kind"] = kind.text 2693 if hint: 2694 kwargs["hint"] = hint 2695 2696 if self._match(TokenType.ON): 2697 kwargs["on"] = self._parse_conjunction() 2698 elif self._match(TokenType.USING): 2699 kwargs["using"] = self._parse_wrapped_id_vars() 2700 elif not (kind and kind.token_type == TokenType.CROSS): 2701 index = self._index 2702 join = self._parse_join() 2703 2704 if join and self._match(TokenType.ON): 2705 kwargs["on"] = self._parse_conjunction() 2706 elif join and self._match(TokenType.USING): 2707 kwargs["using"] = self._parse_wrapped_id_vars() 2708 else: 2709 join = None 2710 self._retreat(index) 2711 2712 kwargs["this"].set("joins", [join] if join else None) 2713 2714 comments = [c for token in (method, side, kind) if token for c in token.comments] 2715 return self.expression(exp.Join, comments=comments, **kwargs) 2716 2717 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2718 this = self._parse_conjunction() 2719 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2720 return this 2721 2722 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2723 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2724 2725 return this 2726 2727 def _parse_index( 2728 self, 2729 index: t.Optional[exp.Expression] = None, 2730 ) -> t.Optional[exp.Index]: 2731 if index: 2732 unique = None 2733 primary = None 2734 amp = None 2735 2736 self._match(TokenType.ON) 2737 self._match(TokenType.TABLE) # hive 2738 table = self._parse_table_parts(schema=True) 2739 else: 2740 unique = self._match(TokenType.UNIQUE) 2741 primary = self._match_text_seq("PRIMARY") 2742 amp = self._match_text_seq("AMP") 2743 2744 if not self._match(TokenType.INDEX): 2745 return None 2746 2747 index = self._parse_id_var() 2748 table = None 2749 2750 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2751 2752 if self._match(TokenType.L_PAREN, advance=False): 2753 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2754 else: 2755 columns = None 2756 2757 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2758 2759 return self.expression( 2760 exp.Index, 2761 this=index, 2762 table=table, 2763 using=using, 2764 columns=columns, 2765 unique=unique, 2766 primary=primary, 2767 amp=amp, 2768 include=include, 2769 partition_by=self._parse_partition_by(), 2770 where=self._parse_where(), 2771 ) 2772 2773 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2774 hints: t.List[exp.Expression] = [] 2775 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2776 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2777 hints.append( 2778 self.expression( 2779 exp.WithTableHint, 2780 expressions=self._parse_csv( 2781 lambda: self._parse_function() or self._parse_var(any_token=True) 2782 ), 2783 ) 2784 ) 2785 self._match_r_paren() 2786 else: 2787 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2788 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2789 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2790 2791 self._match_texts(("INDEX", "KEY")) 2792 if self._match(TokenType.FOR): 2793 hint.set("target", self._advance_any() and self._prev.text.upper()) 2794 2795 hint.set("expressions", self._parse_wrapped_id_vars()) 2796 hints.append(hint) 2797 2798 return hints or None 2799 2800 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2801 return ( 2802 (not schema and self._parse_function(optional_parens=False)) 2803 or self._parse_id_var(any_token=False) 2804 or self._parse_string_as_identifier() 2805 or self._parse_placeholder() 2806 ) 2807 2808 def _parse_table_parts( 2809 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2810 ) -> exp.Table: 2811 catalog = None 2812 db = None 2813 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2814 2815 while self._match(TokenType.DOT): 2816 if catalog: 2817 # This allows nesting the table in arbitrarily many dot expressions if needed 2818 table = self.expression( 2819 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2820 ) 2821 else: 2822 catalog = db 2823 db = table 2824 # "" used for tsql FROM a..b case 2825 table = self._parse_table_part(schema=schema) or "" 2826 2827 if ( 2828 wildcard 2829 and self._is_connected() 2830 and (isinstance(table, exp.Identifier) or not table) 2831 and self._match(TokenType.STAR) 2832 ): 2833 if isinstance(table, exp.Identifier): 2834 table.args["this"] += "*" 2835 else: 2836 table = exp.Identifier(this="*") 2837 2838 if is_db_reference: 2839 catalog = db 2840 db = table 2841 table = None 2842 2843 if not table and not is_db_reference: 2844 self.raise_error(f"Expected table name but got {self._curr}") 2845 if not db and is_db_reference: 2846 self.raise_error(f"Expected database name but got {self._curr}") 2847 2848 return self.expression( 2849 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2850 ) 2851 2852 def _parse_table( 2853 self, 2854 schema: bool = False, 2855 joins: bool = False, 2856 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2857 parse_bracket: bool = False, 2858 is_db_reference: bool = False, 2859 ) -> t.Optional[exp.Expression]: 2860 lateral = self._parse_lateral() 2861 if lateral: 2862 return lateral 2863 2864 unnest = self._parse_unnest() 2865 if unnest: 2866 return unnest 2867 2868 values = self._parse_derived_table_values() 2869 if values: 2870 return values 2871 2872 subquery = self._parse_select(table=True) 2873 if subquery: 2874 if not subquery.args.get("pivots"): 2875 subquery.set("pivots", self._parse_pivots()) 2876 return subquery 2877 2878 bracket = parse_bracket and self._parse_bracket(None) 2879 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2880 this = t.cast( 2881 exp.Expression, 2882 bracket 2883 or self._parse_bracket( 2884 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2885 ), 2886 ) 2887 2888 if schema: 2889 return self._parse_schema(this=this) 2890 2891 version = self._parse_version() 2892 2893 if version: 2894 this.set("version", version) 2895 2896 if self.dialect.ALIAS_POST_TABLESAMPLE: 2897 table_sample = self._parse_table_sample() 2898 2899 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2900 if alias: 2901 this.set("alias", alias) 2902 2903 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2904 return self.expression( 2905 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2906 ) 2907 2908 this.set("hints", self._parse_table_hints()) 2909 2910 if not this.args.get("pivots"): 2911 this.set("pivots", self._parse_pivots()) 2912 2913 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2914 table_sample = self._parse_table_sample() 2915 2916 if table_sample: 2917 table_sample.set("this", this) 2918 this = table_sample 2919 2920 if joins: 2921 for join in iter(self._parse_join, None): 2922 this.append("joins", join) 2923 2924 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2925 this.set("ordinality", True) 2926 this.set("alias", self._parse_table_alias()) 2927 2928 return this 2929 2930 def _parse_version(self) -> t.Optional[exp.Version]: 2931 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2932 this = "TIMESTAMP" 2933 elif self._match(TokenType.VERSION_SNAPSHOT): 2934 this = "VERSION" 2935 else: 2936 return None 2937 2938 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2939 kind = self._prev.text.upper() 2940 start = self._parse_bitwise() 2941 self._match_texts(("TO", "AND")) 2942 end = self._parse_bitwise() 2943 expression: t.Optional[exp.Expression] = self.expression( 2944 exp.Tuple, expressions=[start, end] 2945 ) 2946 elif self._match_text_seq("CONTAINED", "IN"): 2947 kind = "CONTAINED IN" 2948 expression = self.expression( 2949 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2950 ) 2951 elif self._match(TokenType.ALL): 2952 kind = "ALL" 2953 expression = None 2954 else: 2955 self._match_text_seq("AS", "OF") 2956 kind = "AS OF" 2957 expression = self._parse_type() 2958 2959 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2960 2961 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2962 if not self._match(TokenType.UNNEST): 2963 return None 2964 2965 expressions = self._parse_wrapped_csv(self._parse_equality) 2966 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2967 2968 alias = self._parse_table_alias() if with_alias else None 2969 2970 if alias: 2971 if self.dialect.UNNEST_COLUMN_ONLY: 2972 if alias.args.get("columns"): 2973 self.raise_error("Unexpected extra column alias in unnest.") 2974 2975 alias.set("columns", [alias.this]) 2976 alias.set("this", None) 2977 2978 columns = alias.args.get("columns") or [] 2979 if offset and len(expressions) < len(columns): 2980 offset = columns.pop() 2981 2982 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2983 self._match(TokenType.ALIAS) 2984 offset = self._parse_id_var( 2985 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2986 ) or exp.to_identifier("offset") 2987 2988 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2989 2990 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2991 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2992 if not is_derived and not self._match_text_seq("VALUES"): 2993 return None 2994 2995 expressions = self._parse_csv(self._parse_value) 2996 alias = self._parse_table_alias() 2997 2998 if is_derived: 2999 self._match_r_paren() 3000 3001 return self.expression( 3002 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3003 ) 3004 3005 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3006 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3007 as_modifier and self._match_text_seq("USING", "SAMPLE") 3008 ): 3009 return None 3010 3011 bucket_numerator = None 3012 bucket_denominator = None 3013 bucket_field = None 3014 percent = None 3015 size = None 3016 seed = None 3017 3018 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3019 matched_l_paren = self._match(TokenType.L_PAREN) 3020 3021 if self.TABLESAMPLE_CSV: 3022 num = None 3023 expressions = self._parse_csv(self._parse_primary) 3024 else: 3025 expressions = None 3026 num = ( 3027 self._parse_factor() 3028 if self._match(TokenType.NUMBER, advance=False) 3029 else self._parse_primary() or self._parse_placeholder() 3030 ) 3031 3032 if self._match_text_seq("BUCKET"): 3033 bucket_numerator = self._parse_number() 3034 self._match_text_seq("OUT", "OF") 3035 bucket_denominator = bucket_denominator = self._parse_number() 3036 self._match(TokenType.ON) 3037 bucket_field = self._parse_field() 3038 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3039 percent = num 3040 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3041 size = num 3042 else: 3043 percent = num 3044 3045 if matched_l_paren: 3046 self._match_r_paren() 3047 3048 if self._match(TokenType.L_PAREN): 3049 method = self._parse_var(upper=True) 3050 seed = self._match(TokenType.COMMA) and self._parse_number() 3051 self._match_r_paren() 3052 elif self._match_texts(("SEED", "REPEATABLE")): 3053 seed = self._parse_wrapped(self._parse_number) 3054 3055 return self.expression( 3056 exp.TableSample, 3057 expressions=expressions, 3058 method=method, 3059 bucket_numerator=bucket_numerator, 3060 bucket_denominator=bucket_denominator, 3061 bucket_field=bucket_field, 3062 percent=percent, 3063 size=size, 3064 seed=seed, 3065 ) 3066 3067 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3068 return list(iter(self._parse_pivot, None)) or None 3069 3070 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3071 return list(iter(self._parse_join, None)) or None 3072 3073 # https://duckdb.org/docs/sql/statements/pivot 3074 def _parse_simplified_pivot(self) -> exp.Pivot: 3075 def _parse_on() -> t.Optional[exp.Expression]: 3076 this = self._parse_bitwise() 3077 return self._parse_in(this) if self._match(TokenType.IN) else this 3078 3079 this = self._parse_table() 3080 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3081 using = self._match(TokenType.USING) and self._parse_csv( 3082 lambda: self._parse_alias(self._parse_function()) 3083 ) 3084 group = self._parse_group() 3085 return self.expression( 3086 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3087 ) 3088 3089 def _parse_pivot_in(self) -> exp.In: 3090 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3091 this = self._parse_conjunction() 3092 3093 self._match(TokenType.ALIAS) 3094 alias = self._parse_field() 3095 if alias: 3096 return self.expression(exp.PivotAlias, this=this, alias=alias) 3097 3098 return this 3099 3100 value = self._parse_column() 3101 3102 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3103 self.raise_error("Expecting IN (") 3104 3105 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3106 3107 self._match_r_paren() 3108 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3109 3110 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3111 index = self._index 3112 include_nulls = None 3113 3114 if self._match(TokenType.PIVOT): 3115 unpivot = False 3116 elif self._match(TokenType.UNPIVOT): 3117 unpivot = True 3118 3119 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3120 if self._match_text_seq("INCLUDE", "NULLS"): 3121 include_nulls = True 3122 elif self._match_text_seq("EXCLUDE", "NULLS"): 3123 include_nulls = False 3124 else: 3125 return None 3126 3127 expressions = [] 3128 3129 if not self._match(TokenType.L_PAREN): 3130 self._retreat(index) 3131 return None 3132 3133 if unpivot: 3134 expressions = self._parse_csv(self._parse_column) 3135 else: 3136 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3137 3138 if not expressions: 3139 self.raise_error("Failed to parse PIVOT's aggregation list") 3140 3141 if not self._match(TokenType.FOR): 3142 self.raise_error("Expecting FOR") 3143 3144 field = self._parse_pivot_in() 3145 3146 self._match_r_paren() 3147 3148 pivot = self.expression( 3149 exp.Pivot, 3150 expressions=expressions, 3151 field=field, 3152 unpivot=unpivot, 3153 include_nulls=include_nulls, 3154 ) 3155 3156 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3157 pivot.set("alias", self._parse_table_alias()) 3158 3159 if not unpivot: 3160 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3161 3162 columns: t.List[exp.Expression] = [] 3163 for fld in pivot.args["field"].expressions: 3164 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3165 for name in names: 3166 if self.PREFIXED_PIVOT_COLUMNS: 3167 name = f"{name}_{field_name}" if name else field_name 3168 else: 3169 name = f"{field_name}_{name}" if name else field_name 3170 3171 columns.append(exp.to_identifier(name)) 3172 3173 pivot.set("columns", columns) 3174 3175 return pivot 3176 3177 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3178 return [agg.alias for agg in aggregations] 3179 3180 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3181 if not skip_where_token and not self._match(TokenType.WHERE): 3182 return None 3183 3184 return self.expression( 3185 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3186 ) 3187 3188 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3189 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3190 return None 3191 3192 elements = defaultdict(list) 3193 3194 if self._match(TokenType.ALL): 3195 return self.expression(exp.Group, all=True) 3196 3197 while True: 3198 expressions = self._parse_csv(self._parse_conjunction) 3199 if expressions: 3200 elements["expressions"].extend(expressions) 3201 3202 grouping_sets = self._parse_grouping_sets() 3203 if grouping_sets: 3204 elements["grouping_sets"].extend(grouping_sets) 3205 3206 rollup = None 3207 cube = None 3208 totals = None 3209 3210 index = self._index 3211 with_ = self._match(TokenType.WITH) 3212 if self._match(TokenType.ROLLUP): 3213 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3214 elements["rollup"].extend(ensure_list(rollup)) 3215 3216 if self._match(TokenType.CUBE): 3217 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3218 elements["cube"].extend(ensure_list(cube)) 3219 3220 if self._match_text_seq("TOTALS"): 3221 totals = True 3222 elements["totals"] = True # type: ignore 3223 3224 if not (grouping_sets or rollup or cube or totals): 3225 if with_: 3226 self._retreat(index) 3227 break 3228 3229 return self.expression(exp.Group, **elements) # type: ignore 3230 3231 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3232 if not self._match(TokenType.GROUPING_SETS): 3233 return None 3234 3235 return self._parse_wrapped_csv(self._parse_grouping_set) 3236 3237 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3238 if self._match(TokenType.L_PAREN): 3239 grouping_set = self._parse_csv(self._parse_column) 3240 self._match_r_paren() 3241 return self.expression(exp.Tuple, expressions=grouping_set) 3242 3243 return self._parse_column() 3244 3245 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3246 if not skip_having_token and not self._match(TokenType.HAVING): 3247 return None 3248 return self.expression(exp.Having, this=self._parse_conjunction()) 3249 3250 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3251 if not self._match(TokenType.QUALIFY): 3252 return None 3253 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3254 3255 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3256 if skip_start_token: 3257 start = None 3258 elif self._match(TokenType.START_WITH): 3259 start = self._parse_conjunction() 3260 else: 3261 return None 3262 3263 self._match(TokenType.CONNECT_BY) 3264 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3265 exp.Prior, this=self._parse_bitwise() 3266 ) 3267 connect = self._parse_conjunction() 3268 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3269 3270 if not start and self._match(TokenType.START_WITH): 3271 start = self._parse_conjunction() 3272 3273 return self.expression(exp.Connect, start=start, connect=connect) 3274 3275 def _parse_name_as_expression(self) -> exp.Alias: 3276 return self.expression( 3277 exp.Alias, 3278 alias=self._parse_id_var(any_token=True), 3279 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3280 ) 3281 3282 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3283 if self._match_text_seq("INTERPOLATE"): 3284 return self._parse_wrapped_csv(self._parse_name_as_expression) 3285 return None 3286 3287 def _parse_order( 3288 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3289 ) -> t.Optional[exp.Expression]: 3290 siblings = None 3291 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3292 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3293 return this 3294 3295 siblings = True 3296 3297 return self.expression( 3298 exp.Order, 3299 this=this, 3300 expressions=self._parse_csv(self._parse_ordered), 3301 interpolate=self._parse_interpolate(), 3302 siblings=siblings, 3303 ) 3304 3305 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3306 if not self._match(token): 3307 return None 3308 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3309 3310 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3311 this = parse_method() if parse_method else self._parse_conjunction() 3312 3313 asc = self._match(TokenType.ASC) 3314 desc = self._match(TokenType.DESC) or (asc and False) 3315 3316 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3317 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3318 3319 nulls_first = is_nulls_first or False 3320 explicitly_null_ordered = is_nulls_first or is_nulls_last 3321 3322 if ( 3323 not explicitly_null_ordered 3324 and ( 3325 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3326 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3327 ) 3328 and self.dialect.NULL_ORDERING != "nulls_are_last" 3329 ): 3330 nulls_first = True 3331 3332 if self._match_text_seq("WITH", "FILL"): 3333 with_fill = self.expression( 3334 exp.WithFill, 3335 **{ # type: ignore 3336 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3337 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3338 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3339 }, 3340 ) 3341 else: 3342 with_fill = None 3343 3344 return self.expression( 3345 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3346 ) 3347 3348 def _parse_limit( 3349 self, this: t.Optional[exp.Expression] = None, top: bool = False 3350 ) -> t.Optional[exp.Expression]: 3351 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3352 comments = self._prev_comments 3353 if top: 3354 limit_paren = self._match(TokenType.L_PAREN) 3355 expression = self._parse_term() if limit_paren else self._parse_number() 3356 3357 if limit_paren: 3358 self._match_r_paren() 3359 else: 3360 expression = self._parse_term() 3361 3362 if self._match(TokenType.COMMA): 3363 offset = expression 3364 expression = self._parse_term() 3365 else: 3366 offset = None 3367 3368 limit_exp = self.expression( 3369 exp.Limit, 3370 this=this, 3371 expression=expression, 3372 offset=offset, 3373 comments=comments, 3374 expressions=self._parse_limit_by(), 3375 ) 3376 3377 return limit_exp 3378 3379 if self._match(TokenType.FETCH): 3380 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3381 direction = self._prev.text.upper() if direction else "FIRST" 3382 3383 count = self._parse_field(tokens=self.FETCH_TOKENS) 3384 percent = self._match(TokenType.PERCENT) 3385 3386 self._match_set((TokenType.ROW, TokenType.ROWS)) 3387 3388 only = self._match_text_seq("ONLY") 3389 with_ties = self._match_text_seq("WITH", "TIES") 3390 3391 if only and with_ties: 3392 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3393 3394 return self.expression( 3395 exp.Fetch, 3396 direction=direction, 3397 count=count, 3398 percent=percent, 3399 with_ties=with_ties, 3400 ) 3401 3402 return this 3403 3404 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3405 if not self._match(TokenType.OFFSET): 3406 return this 3407 3408 count = self._parse_term() 3409 self._match_set((TokenType.ROW, TokenType.ROWS)) 3410 3411 return self.expression( 3412 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3413 ) 3414 3415 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3416 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3417 3418 def _parse_locks(self) -> t.List[exp.Lock]: 3419 locks = [] 3420 while True: 3421 if self._match_text_seq("FOR", "UPDATE"): 3422 update = True 3423 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3424 "LOCK", "IN", "SHARE", "MODE" 3425 ): 3426 update = False 3427 else: 3428 break 3429 3430 expressions = None 3431 if self._match_text_seq("OF"): 3432 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3433 3434 wait: t.Optional[bool | exp.Expression] = None 3435 if self._match_text_seq("NOWAIT"): 3436 wait = True 3437 elif self._match_text_seq("WAIT"): 3438 wait = self._parse_primary() 3439 elif self._match_text_seq("SKIP", "LOCKED"): 3440 wait = False 3441 3442 locks.append( 3443 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3444 ) 3445 3446 return locks 3447 3448 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3449 while this and self._match_set(self.SET_OPERATIONS): 3450 token_type = self._prev.token_type 3451 3452 if token_type == TokenType.UNION: 3453 operation = exp.Union 3454 elif token_type == TokenType.EXCEPT: 3455 operation = exp.Except 3456 else: 3457 operation = exp.Intersect 3458 3459 comments = self._prev.comments 3460 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3461 by_name = self._match_text_seq("BY", "NAME") 3462 expression = self._parse_select(nested=True, parse_set_operation=False) 3463 3464 this = self.expression( 3465 operation, 3466 comments=comments, 3467 this=this, 3468 distinct=distinct, 3469 by_name=by_name, 3470 expression=expression, 3471 ) 3472 3473 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3474 expression = this.expression 3475 3476 if expression: 3477 for arg in self.UNION_MODIFIERS: 3478 expr = expression.args.get(arg) 3479 if expr: 3480 this.set(arg, expr.pop()) 3481 3482 return this 3483 3484 def _parse_expression(self) -> t.Optional[exp.Expression]: 3485 return self._parse_alias(self._parse_conjunction()) 3486 3487 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3488 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3489 3490 def _parse_equality(self) -> t.Optional[exp.Expression]: 3491 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3492 3493 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3494 return self._parse_tokens(self._parse_range, self.COMPARISON) 3495 3496 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3497 this = this or self._parse_bitwise() 3498 negate = self._match(TokenType.NOT) 3499 3500 if self._match_set(self.RANGE_PARSERS): 3501 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3502 if not expression: 3503 return this 3504 3505 this = expression 3506 elif self._match(TokenType.ISNULL): 3507 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3508 3509 # Postgres supports ISNULL and NOTNULL for conditions. 3510 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3511 if self._match(TokenType.NOTNULL): 3512 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3513 this = self.expression(exp.Not, this=this) 3514 3515 if negate: 3516 this = self.expression(exp.Not, this=this) 3517 3518 if self._match(TokenType.IS): 3519 this = self._parse_is(this) 3520 3521 return this 3522 3523 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3524 index = self._index - 1 3525 negate = self._match(TokenType.NOT) 3526 3527 if self._match_text_seq("DISTINCT", "FROM"): 3528 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3529 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3530 3531 expression = self._parse_null() or self._parse_boolean() 3532 if not expression: 3533 self._retreat(index) 3534 return None 3535 3536 this = self.expression(exp.Is, this=this, expression=expression) 3537 return self.expression(exp.Not, this=this) if negate else this 3538 3539 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3540 unnest = self._parse_unnest(with_alias=False) 3541 if unnest: 3542 this = self.expression(exp.In, this=this, unnest=unnest) 3543 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3544 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3545 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3546 3547 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3548 this = self.expression(exp.In, this=this, query=expressions[0]) 3549 else: 3550 this = self.expression(exp.In, this=this, expressions=expressions) 3551 3552 if matched_l_paren: 3553 self._match_r_paren(this) 3554 elif not self._match(TokenType.R_BRACKET, expression=this): 3555 self.raise_error("Expecting ]") 3556 else: 3557 this = self.expression(exp.In, this=this, field=self._parse_field()) 3558 3559 return this 3560 3561 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3562 low = self._parse_bitwise() 3563 self._match(TokenType.AND) 3564 high = self._parse_bitwise() 3565 return self.expression(exp.Between, this=this, low=low, high=high) 3566 3567 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3568 if not self._match(TokenType.ESCAPE): 3569 return this 3570 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3571 3572 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3573 index = self._index 3574 3575 if not self._match(TokenType.INTERVAL) and match_interval: 3576 return None 3577 3578 if self._match(TokenType.STRING, advance=False): 3579 this = self._parse_primary() 3580 else: 3581 this = self._parse_term() 3582 3583 if not this or ( 3584 isinstance(this, exp.Column) 3585 and not this.table 3586 and not this.this.quoted 3587 and this.name.upper() == "IS" 3588 ): 3589 self._retreat(index) 3590 return None 3591 3592 unit = self._parse_function() or ( 3593 not self._match(TokenType.ALIAS, advance=False) 3594 and self._parse_var(any_token=True, upper=True) 3595 ) 3596 3597 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3598 # each INTERVAL expression into this canonical form so it's easy to transpile 3599 if this and this.is_number: 3600 this = exp.Literal.string(this.name) 3601 elif this and this.is_string: 3602 parts = this.name.split() 3603 3604 if len(parts) == 2: 3605 if unit: 3606 # This is not actually a unit, it's something else (e.g. a "window side") 3607 unit = None 3608 self._retreat(self._index - 1) 3609 3610 this = exp.Literal.string(parts[0]) 3611 unit = self.expression(exp.Var, this=parts[1].upper()) 3612 3613 return self.expression(exp.Interval, this=this, unit=unit) 3614 3615 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3616 this = self._parse_term() 3617 3618 while True: 3619 if self._match_set(self.BITWISE): 3620 this = self.expression( 3621 self.BITWISE[self._prev.token_type], 3622 this=this, 3623 expression=self._parse_term(), 3624 ) 3625 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3626 this = self.expression( 3627 exp.DPipe, 3628 this=this, 3629 expression=self._parse_term(), 3630 safe=not self.dialect.STRICT_STRING_CONCAT, 3631 ) 3632 elif self._match(TokenType.DQMARK): 3633 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3634 elif self._match_pair(TokenType.LT, TokenType.LT): 3635 this = self.expression( 3636 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3637 ) 3638 elif self._match_pair(TokenType.GT, TokenType.GT): 3639 this = self.expression( 3640 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3641 ) 3642 else: 3643 break 3644 3645 return this 3646 3647 def _parse_term(self) -> t.Optional[exp.Expression]: 3648 return self._parse_tokens(self._parse_factor, self.TERM) 3649 3650 def _parse_factor(self) -> t.Optional[exp.Expression]: 3651 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3652 this = parse_method() 3653 3654 while self._match_set(self.FACTOR): 3655 this = self.expression( 3656 self.FACTOR[self._prev.token_type], 3657 this=this, 3658 comments=self._prev_comments, 3659 expression=parse_method(), 3660 ) 3661 if isinstance(this, exp.Div): 3662 this.args["typed"] = self.dialect.TYPED_DIVISION 3663 this.args["safe"] = self.dialect.SAFE_DIVISION 3664 3665 return this 3666 3667 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3668 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3669 3670 def _parse_unary(self) -> t.Optional[exp.Expression]: 3671 if self._match_set(self.UNARY_PARSERS): 3672 return self.UNARY_PARSERS[self._prev.token_type](self) 3673 return self._parse_at_time_zone(self._parse_type()) 3674 3675 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3676 interval = parse_interval and self._parse_interval() 3677 if interval: 3678 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3679 while True: 3680 index = self._index 3681 self._match(TokenType.PLUS) 3682 3683 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3684 self._retreat(index) 3685 break 3686 3687 interval = self.expression( # type: ignore 3688 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3689 ) 3690 3691 return interval 3692 3693 index = self._index 3694 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3695 this = self._parse_column() 3696 3697 if data_type: 3698 if isinstance(this, exp.Literal): 3699 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3700 if parser: 3701 return parser(self, this, data_type) 3702 return self.expression(exp.Cast, this=this, to=data_type) 3703 if not data_type.expressions: 3704 self._retreat(index) 3705 return self._parse_column() 3706 return self._parse_column_ops(data_type) 3707 3708 return this and self._parse_column_ops(this) 3709 3710 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3711 this = self._parse_type() 3712 if not this: 3713 return None 3714 3715 return self.expression( 3716 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3717 ) 3718 3719 def _parse_types( 3720 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3721 ) -> t.Optional[exp.Expression]: 3722 index = self._index 3723 3724 prefix = self._match_text_seq("SYSUDTLIB", ".") 3725 3726 if not self._match_set(self.TYPE_TOKENS): 3727 identifier = allow_identifiers and self._parse_id_var( 3728 any_token=False, tokens=(TokenType.VAR,) 3729 ) 3730 if identifier: 3731 tokens = self.dialect.tokenize(identifier.name) 3732 3733 if len(tokens) != 1: 3734 self.raise_error("Unexpected identifier", self._prev) 3735 3736 if tokens[0].token_type in self.TYPE_TOKENS: 3737 self._prev = tokens[0] 3738 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3739 type_name = identifier.name 3740 3741 while self._match(TokenType.DOT): 3742 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3743 3744 return exp.DataType.build(type_name, udt=True) 3745 else: 3746 self._retreat(self._index - 1) 3747 return None 3748 else: 3749 return None 3750 3751 type_token = self._prev.token_type 3752 3753 if type_token == TokenType.PSEUDO_TYPE: 3754 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3755 3756 if type_token == TokenType.OBJECT_IDENTIFIER: 3757 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3758 3759 nested = type_token in self.NESTED_TYPE_TOKENS 3760 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3761 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3762 expressions = None 3763 maybe_func = False 3764 3765 if self._match(TokenType.L_PAREN): 3766 if is_struct: 3767 expressions = self._parse_csv(self._parse_struct_types) 3768 elif nested: 3769 expressions = self._parse_csv( 3770 lambda: self._parse_types( 3771 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3772 ) 3773 ) 3774 elif type_token in self.ENUM_TYPE_TOKENS: 3775 expressions = self._parse_csv(self._parse_equality) 3776 elif is_aggregate: 3777 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3778 any_token=False, tokens=(TokenType.VAR,) 3779 ) 3780 if not func_or_ident or not self._match(TokenType.COMMA): 3781 return None 3782 expressions = self._parse_csv( 3783 lambda: self._parse_types( 3784 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3785 ) 3786 ) 3787 expressions.insert(0, func_or_ident) 3788 else: 3789 expressions = self._parse_csv(self._parse_type_size) 3790 3791 if not expressions or not self._match(TokenType.R_PAREN): 3792 self._retreat(index) 3793 return None 3794 3795 maybe_func = True 3796 3797 this: t.Optional[exp.Expression] = None 3798 values: t.Optional[t.List[exp.Expression]] = None 3799 3800 if nested and self._match(TokenType.LT): 3801 if is_struct: 3802 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3803 else: 3804 expressions = self._parse_csv( 3805 lambda: self._parse_types( 3806 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3807 ) 3808 ) 3809 3810 if not self._match(TokenType.GT): 3811 self.raise_error("Expecting >") 3812 3813 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3814 values = self._parse_csv(self._parse_conjunction) 3815 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3816 3817 if type_token in self.TIMESTAMPS: 3818 if self._match_text_seq("WITH", "TIME", "ZONE"): 3819 maybe_func = False 3820 tz_type = ( 3821 exp.DataType.Type.TIMETZ 3822 if type_token in self.TIMES 3823 else exp.DataType.Type.TIMESTAMPTZ 3824 ) 3825 this = exp.DataType(this=tz_type, expressions=expressions) 3826 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3827 maybe_func = False 3828 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3829 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3830 maybe_func = False 3831 elif type_token == TokenType.INTERVAL: 3832 unit = self._parse_var() 3833 3834 if self._match_text_seq("TO"): 3835 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3836 else: 3837 span = None 3838 3839 if span or not unit: 3840 this = self.expression( 3841 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3842 ) 3843 else: 3844 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3845 3846 if maybe_func and check_func: 3847 index2 = self._index 3848 peek = self._parse_string() 3849 3850 if not peek: 3851 self._retreat(index) 3852 return None 3853 3854 self._retreat(index2) 3855 3856 if not this: 3857 if self._match_text_seq("UNSIGNED"): 3858 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3859 if not unsigned_type_token: 3860 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3861 3862 type_token = unsigned_type_token or type_token 3863 3864 this = exp.DataType( 3865 this=exp.DataType.Type[type_token.value], 3866 expressions=expressions, 3867 nested=nested, 3868 values=values, 3869 prefix=prefix, 3870 ) 3871 3872 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3873 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3874 3875 return this 3876 3877 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3878 index = self._index 3879 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3880 self._match(TokenType.COLON) 3881 column_def = self._parse_column_def(this) 3882 3883 if type_required and ( 3884 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3885 ): 3886 self._retreat(index) 3887 return self._parse_types() 3888 3889 return column_def 3890 3891 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3892 if not self._match_text_seq("AT", "TIME", "ZONE"): 3893 return this 3894 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3895 3896 def _parse_column(self) -> t.Optional[exp.Expression]: 3897 this = self._parse_column_reference() 3898 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3899 3900 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3901 this = self._parse_field() 3902 if ( 3903 not this 3904 and self._match(TokenType.VALUES, advance=False) 3905 and self.VALUES_FOLLOWED_BY_PAREN 3906 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3907 ): 3908 this = self._parse_id_var() 3909 3910 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3911 3912 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3913 this = self._parse_bracket(this) 3914 3915 while self._match_set(self.COLUMN_OPERATORS): 3916 op_token = self._prev.token_type 3917 op = self.COLUMN_OPERATORS.get(op_token) 3918 3919 if op_token == TokenType.DCOLON: 3920 field = self._parse_types() 3921 if not field: 3922 self.raise_error("Expected type") 3923 elif op and self._curr: 3924 field = self._parse_column_reference() 3925 else: 3926 field = self._parse_field(anonymous_func=True, any_token=True) 3927 3928 if isinstance(field, exp.Func): 3929 # bigquery allows function calls like x.y.count(...) 3930 # SAFE.SUBSTR(...) 3931 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3932 this = self._replace_columns_with_dots(this) 3933 3934 if op: 3935 this = op(self, this, field) 3936 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3937 this = self.expression( 3938 exp.Column, 3939 this=field, 3940 table=this.this, 3941 db=this.args.get("table"), 3942 catalog=this.args.get("db"), 3943 ) 3944 else: 3945 this = self.expression(exp.Dot, this=this, expression=field) 3946 this = self._parse_bracket(this) 3947 return this 3948 3949 def _parse_primary(self) -> t.Optional[exp.Expression]: 3950 if self._match_set(self.PRIMARY_PARSERS): 3951 token_type = self._prev.token_type 3952 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3953 3954 if token_type == TokenType.STRING: 3955 expressions = [primary] 3956 while self._match(TokenType.STRING): 3957 expressions.append(exp.Literal.string(self._prev.text)) 3958 3959 if len(expressions) > 1: 3960 return self.expression(exp.Concat, expressions=expressions) 3961 3962 return primary 3963 3964 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3965 return exp.Literal.number(f"0.{self._prev.text}") 3966 3967 if self._match(TokenType.L_PAREN): 3968 comments = self._prev_comments 3969 query = self._parse_select() 3970 3971 if query: 3972 expressions = [query] 3973 else: 3974 expressions = self._parse_expressions() 3975 3976 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3977 3978 if isinstance(this, exp.Subqueryable): 3979 this = self._parse_set_operations( 3980 self._parse_subquery(this=this, parse_alias=False) 3981 ) 3982 elif len(expressions) > 1: 3983 this = self.expression(exp.Tuple, expressions=expressions) 3984 else: 3985 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3986 3987 if this: 3988 this.add_comments(comments) 3989 3990 self._match_r_paren(expression=this) 3991 return this 3992 3993 return None 3994 3995 def _parse_field( 3996 self, 3997 any_token: bool = False, 3998 tokens: t.Optional[t.Collection[TokenType]] = None, 3999 anonymous_func: bool = False, 4000 ) -> t.Optional[exp.Expression]: 4001 return ( 4002 self._parse_primary() 4003 or self._parse_function(anonymous=anonymous_func) 4004 or self._parse_id_var(any_token=any_token, tokens=tokens) 4005 ) 4006 4007 def _parse_function( 4008 self, 4009 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4010 anonymous: bool = False, 4011 optional_parens: bool = True, 4012 ) -> t.Optional[exp.Expression]: 4013 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4014 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4015 fn_syntax = False 4016 if ( 4017 self._match(TokenType.L_BRACE, advance=False) 4018 and self._next 4019 and self._next.text.upper() == "FN" 4020 ): 4021 self._advance(2) 4022 fn_syntax = True 4023 4024 func = self._parse_function_call( 4025 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4026 ) 4027 4028 if fn_syntax: 4029 self._match(TokenType.R_BRACE) 4030 4031 return func 4032 4033 def _parse_function_call( 4034 self, 4035 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4036 anonymous: bool = False, 4037 optional_parens: bool = True, 4038 ) -> t.Optional[exp.Expression]: 4039 if not self._curr: 4040 return None 4041 4042 comments = self._curr.comments 4043 token_type = self._curr.token_type 4044 this = self._curr.text 4045 upper = this.upper() 4046 4047 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4048 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4049 self._advance() 4050 return parser(self) 4051 4052 if not self._next or self._next.token_type != TokenType.L_PAREN: 4053 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4054 self._advance() 4055 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4056 4057 return None 4058 4059 if token_type not in self.FUNC_TOKENS: 4060 return None 4061 4062 self._advance(2) 4063 4064 parser = self.FUNCTION_PARSERS.get(upper) 4065 if parser and not anonymous: 4066 this = parser(self) 4067 else: 4068 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4069 4070 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4071 this = self.expression(subquery_predicate, this=self._parse_select()) 4072 self._match_r_paren() 4073 return this 4074 4075 if functions is None: 4076 functions = self.FUNCTIONS 4077 4078 function = functions.get(upper) 4079 4080 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4081 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4082 4083 if function and not anonymous: 4084 if "dialect" in function.__code__.co_varnames: 4085 func = function(args, dialect=self.dialect) 4086 else: 4087 func = function(args) 4088 4089 func = self.validate_expression(func, args) 4090 if not self.dialect.NORMALIZE_FUNCTIONS: 4091 func.meta["name"] = this 4092 4093 this = func 4094 else: 4095 this = self.expression(exp.Anonymous, this=this, expressions=args) 4096 4097 if isinstance(this, exp.Expression): 4098 this.add_comments(comments) 4099 4100 self._match_r_paren(this) 4101 return self._parse_window(this) 4102 4103 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4104 return self._parse_column_def(self._parse_id_var()) 4105 4106 def _parse_user_defined_function( 4107 self, kind: t.Optional[TokenType] = None 4108 ) -> t.Optional[exp.Expression]: 4109 this = self._parse_id_var() 4110 4111 while self._match(TokenType.DOT): 4112 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4113 4114 if not self._match(TokenType.L_PAREN): 4115 return this 4116 4117 expressions = self._parse_csv(self._parse_function_parameter) 4118 self._match_r_paren() 4119 return self.expression( 4120 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4121 ) 4122 4123 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4124 literal = self._parse_primary() 4125 if literal: 4126 return self.expression(exp.Introducer, this=token.text, expression=literal) 4127 4128 return self.expression(exp.Identifier, this=token.text) 4129 4130 def _parse_session_parameter(self) -> exp.SessionParameter: 4131 kind = None 4132 this = self._parse_id_var() or self._parse_primary() 4133 4134 if this and self._match(TokenType.DOT): 4135 kind = this.name 4136 this = self._parse_var() or self._parse_primary() 4137 4138 return self.expression(exp.SessionParameter, this=this, kind=kind) 4139 4140 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4141 index = self._index 4142 4143 if self._match(TokenType.L_PAREN): 4144 expressions = t.cast( 4145 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4146 ) 4147 4148 if not self._match(TokenType.R_PAREN): 4149 self._retreat(index) 4150 else: 4151 expressions = [self._parse_id_var()] 4152 4153 if self._match_set(self.LAMBDAS): 4154 return self.LAMBDAS[self._prev.token_type](self, expressions) 4155 4156 self._retreat(index) 4157 4158 this: t.Optional[exp.Expression] 4159 4160 if self._match(TokenType.DISTINCT): 4161 this = self.expression( 4162 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4163 ) 4164 else: 4165 this = self._parse_select_or_expression(alias=alias) 4166 4167 return self._parse_limit( 4168 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4169 ) 4170 4171 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4172 index = self._index 4173 4174 if not self.errors: 4175 try: 4176 if self._parse_select(nested=True): 4177 return this 4178 except ParseError: 4179 pass 4180 finally: 4181 self.errors.clear() 4182 self._retreat(index) 4183 4184 if not self._match(TokenType.L_PAREN): 4185 return this 4186 4187 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4188 4189 self._match_r_paren() 4190 return self.expression(exp.Schema, this=this, expressions=args) 4191 4192 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4193 return self._parse_column_def(self._parse_field(any_token=True)) 4194 4195 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4196 # column defs are not really columns, they're identifiers 4197 if isinstance(this, exp.Column): 4198 this = this.this 4199 4200 kind = self._parse_types(schema=True) 4201 4202 if self._match_text_seq("FOR", "ORDINALITY"): 4203 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4204 4205 constraints: t.List[exp.Expression] = [] 4206 4207 if not kind and self._match(TokenType.ALIAS): 4208 constraints.append( 4209 self.expression( 4210 exp.ComputedColumnConstraint, 4211 this=self._parse_conjunction(), 4212 persisted=self._match_text_seq("PERSISTED"), 4213 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4214 ) 4215 ) 4216 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4217 self._match(TokenType.ALIAS) 4218 constraints.append( 4219 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4220 ) 4221 4222 while True: 4223 constraint = self._parse_column_constraint() 4224 if not constraint: 4225 break 4226 constraints.append(constraint) 4227 4228 if not kind and not constraints: 4229 return this 4230 4231 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4232 4233 def _parse_auto_increment( 4234 self, 4235 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4236 start = None 4237 increment = None 4238 4239 if self._match(TokenType.L_PAREN, advance=False): 4240 args = self._parse_wrapped_csv(self._parse_bitwise) 4241 start = seq_get(args, 0) 4242 increment = seq_get(args, 1) 4243 elif self._match_text_seq("START"): 4244 start = self._parse_bitwise() 4245 self._match_text_seq("INCREMENT") 4246 increment = self._parse_bitwise() 4247 4248 if start and increment: 4249 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4250 4251 return exp.AutoIncrementColumnConstraint() 4252 4253 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4254 if not self._match_text_seq("REFRESH"): 4255 self._retreat(self._index - 1) 4256 return None 4257 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4258 4259 def _parse_compress(self) -> exp.CompressColumnConstraint: 4260 if self._match(TokenType.L_PAREN, advance=False): 4261 return self.expression( 4262 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4263 ) 4264 4265 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4266 4267 def _parse_generated_as_identity( 4268 self, 4269 ) -> ( 4270 exp.GeneratedAsIdentityColumnConstraint 4271 | exp.ComputedColumnConstraint 4272 | exp.GeneratedAsRowColumnConstraint 4273 ): 4274 if self._match_text_seq("BY", "DEFAULT"): 4275 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4276 this = self.expression( 4277 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4278 ) 4279 else: 4280 self._match_text_seq("ALWAYS") 4281 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4282 4283 self._match(TokenType.ALIAS) 4284 4285 if self._match_text_seq("ROW"): 4286 start = self._match_text_seq("START") 4287 if not start: 4288 self._match(TokenType.END) 4289 hidden = self._match_text_seq("HIDDEN") 4290 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4291 4292 identity = self._match_text_seq("IDENTITY") 4293 4294 if self._match(TokenType.L_PAREN): 4295 if self._match(TokenType.START_WITH): 4296 this.set("start", self._parse_bitwise()) 4297 if self._match_text_seq("INCREMENT", "BY"): 4298 this.set("increment", self._parse_bitwise()) 4299 if self._match_text_seq("MINVALUE"): 4300 this.set("minvalue", self._parse_bitwise()) 4301 if self._match_text_seq("MAXVALUE"): 4302 this.set("maxvalue", self._parse_bitwise()) 4303 4304 if self._match_text_seq("CYCLE"): 4305 this.set("cycle", True) 4306 elif self._match_text_seq("NO", "CYCLE"): 4307 this.set("cycle", False) 4308 4309 if not identity: 4310 this.set("expression", self._parse_bitwise()) 4311 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4312 args = self._parse_csv(self._parse_bitwise) 4313 this.set("start", seq_get(args, 0)) 4314 this.set("increment", seq_get(args, 1)) 4315 4316 self._match_r_paren() 4317 4318 return this 4319 4320 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4321 self._match_text_seq("LENGTH") 4322 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4323 4324 def _parse_not_constraint( 4325 self, 4326 ) -> t.Optional[exp.Expression]: 4327 if self._match_text_seq("NULL"): 4328 return self.expression(exp.NotNullColumnConstraint) 4329 if self._match_text_seq("CASESPECIFIC"): 4330 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4331 if self._match_text_seq("FOR", "REPLICATION"): 4332 return self.expression(exp.NotForReplicationColumnConstraint) 4333 return None 4334 4335 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4336 if self._match(TokenType.CONSTRAINT): 4337 this = self._parse_id_var() 4338 else: 4339 this = None 4340 4341 if self._match_texts(self.CONSTRAINT_PARSERS): 4342 return self.expression( 4343 exp.ColumnConstraint, 4344 this=this, 4345 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4346 ) 4347 4348 return this 4349 4350 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4351 if not self._match(TokenType.CONSTRAINT): 4352 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4353 4354 this = self._parse_id_var() 4355 expressions = [] 4356 4357 while True: 4358 constraint = self._parse_unnamed_constraint() or self._parse_function() 4359 if not constraint: 4360 break 4361 expressions.append(constraint) 4362 4363 return self.expression(exp.Constraint, this=this, expressions=expressions) 4364 4365 def _parse_unnamed_constraint( 4366 self, constraints: t.Optional[t.Collection[str]] = None 4367 ) -> t.Optional[exp.Expression]: 4368 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4369 constraints or self.CONSTRAINT_PARSERS 4370 ): 4371 return None 4372 4373 constraint = self._prev.text.upper() 4374 if constraint not in self.CONSTRAINT_PARSERS: 4375 self.raise_error(f"No parser found for schema constraint {constraint}.") 4376 4377 return self.CONSTRAINT_PARSERS[constraint](self) 4378 4379 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4380 self._match_text_seq("KEY") 4381 return self.expression( 4382 exp.UniqueColumnConstraint, 4383 this=self._parse_schema(self._parse_id_var(any_token=False)), 4384 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4385 ) 4386 4387 def _parse_key_constraint_options(self) -> t.List[str]: 4388 options = [] 4389 while True: 4390 if not self._curr: 4391 break 4392 4393 if self._match(TokenType.ON): 4394 action = None 4395 on = self._advance_any() and self._prev.text 4396 4397 if self._match_text_seq("NO", "ACTION"): 4398 action = "NO ACTION" 4399 elif self._match_text_seq("CASCADE"): 4400 action = "CASCADE" 4401 elif self._match_text_seq("RESTRICT"): 4402 action = "RESTRICT" 4403 elif self._match_pair(TokenType.SET, TokenType.NULL): 4404 action = "SET NULL" 4405 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4406 action = "SET DEFAULT" 4407 else: 4408 self.raise_error("Invalid key constraint") 4409 4410 options.append(f"ON {on} {action}") 4411 elif self._match_text_seq("NOT", "ENFORCED"): 4412 options.append("NOT ENFORCED") 4413 elif self._match_text_seq("DEFERRABLE"): 4414 options.append("DEFERRABLE") 4415 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4416 options.append("INITIALLY DEFERRED") 4417 elif self._match_text_seq("NORELY"): 4418 options.append("NORELY") 4419 elif self._match_text_seq("MATCH", "FULL"): 4420 options.append("MATCH FULL") 4421 else: 4422 break 4423 4424 return options 4425 4426 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4427 if match and not self._match(TokenType.REFERENCES): 4428 return None 4429 4430 expressions = None 4431 this = self._parse_table(schema=True) 4432 options = self._parse_key_constraint_options() 4433 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4434 4435 def _parse_foreign_key(self) -> exp.ForeignKey: 4436 expressions = self._parse_wrapped_id_vars() 4437 reference = self._parse_references() 4438 options = {} 4439 4440 while self._match(TokenType.ON): 4441 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4442 self.raise_error("Expected DELETE or UPDATE") 4443 4444 kind = self._prev.text.lower() 4445 4446 if self._match_text_seq("NO", "ACTION"): 4447 action = "NO ACTION" 4448 elif self._match(TokenType.SET): 4449 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4450 action = "SET " + self._prev.text.upper() 4451 else: 4452 self._advance() 4453 action = self._prev.text.upper() 4454 4455 options[kind] = action 4456 4457 return self.expression( 4458 exp.ForeignKey, 4459 expressions=expressions, 4460 reference=reference, 4461 **options, # type: ignore 4462 ) 4463 4464 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4465 return self._parse_field() 4466 4467 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4468 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4469 self._retreat(self._index - 1) 4470 return None 4471 4472 id_vars = self._parse_wrapped_id_vars() 4473 return self.expression( 4474 exp.PeriodForSystemTimeConstraint, 4475 this=seq_get(id_vars, 0), 4476 expression=seq_get(id_vars, 1), 4477 ) 4478 4479 def _parse_primary_key( 4480 self, wrapped_optional: bool = False, in_props: bool = False 4481 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4482 desc = ( 4483 self._match_set((TokenType.ASC, TokenType.DESC)) 4484 and self._prev.token_type == TokenType.DESC 4485 ) 4486 4487 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4488 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4489 4490 expressions = self._parse_wrapped_csv( 4491 self._parse_primary_key_part, optional=wrapped_optional 4492 ) 4493 options = self._parse_key_constraint_options() 4494 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4495 4496 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4497 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4498 4499 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4500 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4501 return this 4502 4503 bracket_kind = self._prev.token_type 4504 expressions = self._parse_csv( 4505 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4506 ) 4507 4508 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4509 self.raise_error("Expected ]") 4510 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4511 self.raise_error("Expected }") 4512 4513 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4514 if bracket_kind == TokenType.L_BRACE: 4515 this = self.expression(exp.Struct, expressions=expressions) 4516 elif not this or this.name.upper() == "ARRAY": 4517 this = self.expression(exp.Array, expressions=expressions) 4518 else: 4519 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4520 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4521 4522 self._add_comments(this) 4523 return self._parse_bracket(this) 4524 4525 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 if self._match(TokenType.COLON): 4527 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4528 return this 4529 4530 def _parse_case(self) -> t.Optional[exp.Expression]: 4531 ifs = [] 4532 default = None 4533 4534 comments = self._prev_comments 4535 expression = self._parse_conjunction() 4536 4537 while self._match(TokenType.WHEN): 4538 this = self._parse_conjunction() 4539 self._match(TokenType.THEN) 4540 then = self._parse_conjunction() 4541 ifs.append(self.expression(exp.If, this=this, true=then)) 4542 4543 if self._match(TokenType.ELSE): 4544 default = self._parse_conjunction() 4545 4546 if not self._match(TokenType.END): 4547 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4548 default = exp.column("interval") 4549 else: 4550 self.raise_error("Expected END after CASE", self._prev) 4551 4552 return self._parse_window( 4553 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4554 ) 4555 4556 def _parse_if(self) -> t.Optional[exp.Expression]: 4557 if self._match(TokenType.L_PAREN): 4558 args = self._parse_csv(self._parse_conjunction) 4559 this = self.validate_expression(exp.If.from_arg_list(args), args) 4560 self._match_r_paren() 4561 else: 4562 index = self._index - 1 4563 4564 if self.NO_PAREN_IF_COMMANDS and index == 0: 4565 return self._parse_as_command(self._prev) 4566 4567 condition = self._parse_conjunction() 4568 4569 if not condition: 4570 self._retreat(index) 4571 return None 4572 4573 self._match(TokenType.THEN) 4574 true = self._parse_conjunction() 4575 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4576 self._match(TokenType.END) 4577 this = self.expression(exp.If, this=condition, true=true, false=false) 4578 4579 return self._parse_window(this) 4580 4581 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4582 if not self._match_text_seq("VALUE", "FOR"): 4583 self._retreat(self._index - 1) 4584 return None 4585 4586 return self.expression( 4587 exp.NextValueFor, 4588 this=self._parse_column(), 4589 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4590 ) 4591 4592 def _parse_extract(self) -> exp.Extract: 4593 this = self._parse_function() or self._parse_var() or self._parse_type() 4594 4595 if self._match(TokenType.FROM): 4596 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4597 4598 if not self._match(TokenType.COMMA): 4599 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4600 4601 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4602 4603 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4604 this = self._parse_conjunction() 4605 4606 if not self._match(TokenType.ALIAS): 4607 if self._match(TokenType.COMMA): 4608 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4609 4610 self.raise_error("Expected AS after CAST") 4611 4612 fmt = None 4613 to = self._parse_types() 4614 4615 if self._match(TokenType.FORMAT): 4616 fmt_string = self._parse_string() 4617 fmt = self._parse_at_time_zone(fmt_string) 4618 4619 if not to: 4620 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4621 if to.this in exp.DataType.TEMPORAL_TYPES: 4622 this = self.expression( 4623 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4624 this=this, 4625 format=exp.Literal.string( 4626 format_time( 4627 fmt_string.this if fmt_string else "", 4628 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4629 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4630 ) 4631 ), 4632 ) 4633 4634 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4635 this.set("zone", fmt.args["zone"]) 4636 return this 4637 elif not to: 4638 self.raise_error("Expected TYPE after CAST") 4639 elif isinstance(to, exp.Identifier): 4640 to = exp.DataType.build(to.name, udt=True) 4641 elif to.this == exp.DataType.Type.CHAR: 4642 if self._match(TokenType.CHARACTER_SET): 4643 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4644 4645 return self.expression( 4646 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4647 ) 4648 4649 def _parse_string_agg(self) -> exp.Expression: 4650 if self._match(TokenType.DISTINCT): 4651 args: t.List[t.Optional[exp.Expression]] = [ 4652 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4653 ] 4654 if self._match(TokenType.COMMA): 4655 args.extend(self._parse_csv(self._parse_conjunction)) 4656 else: 4657 args = self._parse_csv(self._parse_conjunction) # type: ignore 4658 4659 index = self._index 4660 if not self._match(TokenType.R_PAREN) and args: 4661 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4662 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4663 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4664 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4665 4666 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4667 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4668 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4669 if not self._match_text_seq("WITHIN", "GROUP"): 4670 self._retreat(index) 4671 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4672 4673 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4674 order = self._parse_order(this=seq_get(args, 0)) 4675 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4676 4677 def _parse_convert( 4678 self, strict: bool, safe: t.Optional[bool] = None 4679 ) -> t.Optional[exp.Expression]: 4680 this = self._parse_bitwise() 4681 4682 if self._match(TokenType.USING): 4683 to: t.Optional[exp.Expression] = self.expression( 4684 exp.CharacterSet, this=self._parse_var() 4685 ) 4686 elif self._match(TokenType.COMMA): 4687 to = self._parse_types() 4688 else: 4689 to = None 4690 4691 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4692 4693 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4694 """ 4695 There are generally two variants of the DECODE function: 4696 4697 - DECODE(bin, charset) 4698 - DECODE(expression, search, result [, search, result] ... [, default]) 4699 4700 The second variant will always be parsed into a CASE expression. Note that NULL 4701 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4702 instead of relying on pattern matching. 4703 """ 4704 args = self._parse_csv(self._parse_conjunction) 4705 4706 if len(args) < 3: 4707 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4708 4709 expression, *expressions = args 4710 if not expression: 4711 return None 4712 4713 ifs = [] 4714 for search, result in zip(expressions[::2], expressions[1::2]): 4715 if not search or not result: 4716 return None 4717 4718 if isinstance(search, exp.Literal): 4719 ifs.append( 4720 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4721 ) 4722 elif isinstance(search, exp.Null): 4723 ifs.append( 4724 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4725 ) 4726 else: 4727 cond = exp.or_( 4728 exp.EQ(this=expression.copy(), expression=search), 4729 exp.and_( 4730 exp.Is(this=expression.copy(), expression=exp.Null()), 4731 exp.Is(this=search.copy(), expression=exp.Null()), 4732 copy=False, 4733 ), 4734 copy=False, 4735 ) 4736 ifs.append(exp.If(this=cond, true=result)) 4737 4738 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4739 4740 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4741 self._match_text_seq("KEY") 4742 key = self._parse_column() 4743 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4744 self._match_text_seq("VALUE") 4745 value = self._parse_bitwise() 4746 4747 if not key and not value: 4748 return None 4749 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4750 4751 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4752 if not this or not self._match_text_seq("FORMAT", "JSON"): 4753 return this 4754 4755 return self.expression(exp.FormatJson, this=this) 4756 4757 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4758 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4759 for value in values: 4760 if self._match_text_seq(value, "ON", on): 4761 return f"{value} ON {on}" 4762 4763 return None 4764 4765 @t.overload 4766 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4767 ... 4768 4769 @t.overload 4770 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4771 ... 4772 4773 def _parse_json_object(self, agg=False): 4774 star = self._parse_star() 4775 expressions = ( 4776 [star] 4777 if star 4778 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4779 ) 4780 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4781 4782 unique_keys = None 4783 if self._match_text_seq("WITH", "UNIQUE"): 4784 unique_keys = True 4785 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4786 unique_keys = False 4787 4788 self._match_text_seq("KEYS") 4789 4790 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4791 self._parse_type() 4792 ) 4793 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4794 4795 return self.expression( 4796 exp.JSONObjectAgg if agg else exp.JSONObject, 4797 expressions=expressions, 4798 null_handling=null_handling, 4799 unique_keys=unique_keys, 4800 return_type=return_type, 4801 encoding=encoding, 4802 ) 4803 4804 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4805 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4806 if not self._match_text_seq("NESTED"): 4807 this = self._parse_id_var() 4808 kind = self._parse_types(allow_identifiers=False) 4809 nested = None 4810 else: 4811 this = None 4812 kind = None 4813 nested = True 4814 4815 path = self._match_text_seq("PATH") and self._parse_string() 4816 nested_schema = nested and self._parse_json_schema() 4817 4818 return self.expression( 4819 exp.JSONColumnDef, 4820 this=this, 4821 kind=kind, 4822 path=path, 4823 nested_schema=nested_schema, 4824 ) 4825 4826 def _parse_json_schema(self) -> exp.JSONSchema: 4827 self._match_text_seq("COLUMNS") 4828 return self.expression( 4829 exp.JSONSchema, 4830 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4831 ) 4832 4833 def _parse_json_table(self) -> exp.JSONTable: 4834 this = self._parse_format_json(self._parse_bitwise()) 4835 path = self._match(TokenType.COMMA) and self._parse_string() 4836 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4837 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4838 schema = self._parse_json_schema() 4839 4840 return exp.JSONTable( 4841 this=this, 4842 schema=schema, 4843 path=path, 4844 error_handling=error_handling, 4845 empty_handling=empty_handling, 4846 ) 4847 4848 def _parse_match_against(self) -> exp.MatchAgainst: 4849 expressions = self._parse_csv(self._parse_column) 4850 4851 self._match_text_seq(")", "AGAINST", "(") 4852 4853 this = self._parse_string() 4854 4855 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4856 modifier = "IN NATURAL LANGUAGE MODE" 4857 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4858 modifier = f"{modifier} WITH QUERY EXPANSION" 4859 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4860 modifier = "IN BOOLEAN MODE" 4861 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4862 modifier = "WITH QUERY EXPANSION" 4863 else: 4864 modifier = None 4865 4866 return self.expression( 4867 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4868 ) 4869 4870 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4871 def _parse_open_json(self) -> exp.OpenJSON: 4872 this = self._parse_bitwise() 4873 path = self._match(TokenType.COMMA) and self._parse_string() 4874 4875 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4876 this = self._parse_field(any_token=True) 4877 kind = self._parse_types() 4878 path = self._parse_string() 4879 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4880 4881 return self.expression( 4882 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4883 ) 4884 4885 expressions = None 4886 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4887 self._match_l_paren() 4888 expressions = self._parse_csv(_parse_open_json_column_def) 4889 4890 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4891 4892 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4893 args = self._parse_csv(self._parse_bitwise) 4894 4895 if self._match(TokenType.IN): 4896 return self.expression( 4897 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4898 ) 4899 4900 if haystack_first: 4901 haystack = seq_get(args, 0) 4902 needle = seq_get(args, 1) 4903 else: 4904 needle = seq_get(args, 0) 4905 haystack = seq_get(args, 1) 4906 4907 return self.expression( 4908 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4909 ) 4910 4911 def _parse_predict(self) -> exp.Predict: 4912 self._match_text_seq("MODEL") 4913 this = self._parse_table() 4914 4915 self._match(TokenType.COMMA) 4916 self._match_text_seq("TABLE") 4917 4918 return self.expression( 4919 exp.Predict, 4920 this=this, 4921 expression=self._parse_table(), 4922 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4923 ) 4924 4925 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4926 args = self._parse_csv(self._parse_table) 4927 return exp.JoinHint(this=func_name.upper(), expressions=args) 4928 4929 def _parse_substring(self) -> exp.Substring: 4930 # Postgres supports the form: substring(string [from int] [for int]) 4931 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4932 4933 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4934 4935 if self._match(TokenType.FROM): 4936 args.append(self._parse_bitwise()) 4937 if self._match(TokenType.FOR): 4938 args.append(self._parse_bitwise()) 4939 4940 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4941 4942 def _parse_trim(self) -> exp.Trim: 4943 # https://www.w3resource.com/sql/character-functions/trim.php 4944 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4945 4946 position = None 4947 collation = None 4948 expression = None 4949 4950 if self._match_texts(self.TRIM_TYPES): 4951 position = self._prev.text.upper() 4952 4953 this = self._parse_bitwise() 4954 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4955 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4956 expression = self._parse_bitwise() 4957 4958 if invert_order: 4959 this, expression = expression, this 4960 4961 if self._match(TokenType.COLLATE): 4962 collation = self._parse_bitwise() 4963 4964 return self.expression( 4965 exp.Trim, this=this, position=position, expression=expression, collation=collation 4966 ) 4967 4968 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4969 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4970 4971 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4972 return self._parse_window(self._parse_id_var(), alias=True) 4973 4974 def _parse_respect_or_ignore_nulls( 4975 self, this: t.Optional[exp.Expression] 4976 ) -> t.Optional[exp.Expression]: 4977 if self._match_text_seq("IGNORE", "NULLS"): 4978 return self.expression(exp.IgnoreNulls, this=this) 4979 if self._match_text_seq("RESPECT", "NULLS"): 4980 return self.expression(exp.RespectNulls, this=this) 4981 return this 4982 4983 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4984 if self._match(TokenType.HAVING): 4985 self._match_texts(("MAX", "MIN")) 4986 max = self._prev.text.upper() != "MIN" 4987 return self.expression( 4988 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4989 ) 4990 4991 return this 4992 4993 def _parse_window( 4994 self, this: t.Optional[exp.Expression], alias: bool = False 4995 ) -> t.Optional[exp.Expression]: 4996 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4997 self._match(TokenType.WHERE) 4998 this = self.expression( 4999 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5000 ) 5001 self._match_r_paren() 5002 5003 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5004 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5005 if self._match_text_seq("WITHIN", "GROUP"): 5006 order = self._parse_wrapped(self._parse_order) 5007 this = self.expression(exp.WithinGroup, this=this, expression=order) 5008 5009 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5010 # Some dialects choose to implement and some do not. 5011 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5012 5013 # There is some code above in _parse_lambda that handles 5014 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5015 5016 # The below changes handle 5017 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5018 5019 # Oracle allows both formats 5020 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5021 # and Snowflake chose to do the same for familiarity 5022 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5023 if isinstance(this, exp.AggFunc): 5024 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5025 5026 if ignore_respect and ignore_respect is not this: 5027 ignore_respect.replace(ignore_respect.this) 5028 this = self.expression(ignore_respect.__class__, this=this) 5029 5030 this = self._parse_respect_or_ignore_nulls(this) 5031 5032 # bigquery select from window x AS (partition by ...) 5033 if alias: 5034 over = None 5035 self._match(TokenType.ALIAS) 5036 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5037 return this 5038 else: 5039 over = self._prev.text.upper() 5040 5041 if not self._match(TokenType.L_PAREN): 5042 return self.expression( 5043 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5044 ) 5045 5046 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5047 5048 first = self._match(TokenType.FIRST) 5049 if self._match_text_seq("LAST"): 5050 first = False 5051 5052 partition, order = self._parse_partition_and_order() 5053 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5054 5055 if kind: 5056 self._match(TokenType.BETWEEN) 5057 start = self._parse_window_spec() 5058 self._match(TokenType.AND) 5059 end = self._parse_window_spec() 5060 5061 spec = self.expression( 5062 exp.WindowSpec, 5063 kind=kind, 5064 start=start["value"], 5065 start_side=start["side"], 5066 end=end["value"], 5067 end_side=end["side"], 5068 ) 5069 else: 5070 spec = None 5071 5072 self._match_r_paren() 5073 5074 window = self.expression( 5075 exp.Window, 5076 this=this, 5077 partition_by=partition, 5078 order=order, 5079 spec=spec, 5080 alias=window_alias, 5081 over=over, 5082 first=first, 5083 ) 5084 5085 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5086 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5087 return self._parse_window(window, alias=alias) 5088 5089 return window 5090 5091 def _parse_partition_and_order( 5092 self, 5093 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5094 return self._parse_partition_by(), self._parse_order() 5095 5096 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5097 self._match(TokenType.BETWEEN) 5098 5099 return { 5100 "value": ( 5101 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5102 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5103 or self._parse_bitwise() 5104 ), 5105 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5106 } 5107 5108 def _parse_alias( 5109 self, this: t.Optional[exp.Expression], explicit: bool = False 5110 ) -> t.Optional[exp.Expression]: 5111 any_token = self._match(TokenType.ALIAS) 5112 comments = self._prev_comments 5113 5114 if explicit and not any_token: 5115 return this 5116 5117 if self._match(TokenType.L_PAREN): 5118 aliases = self.expression( 5119 exp.Aliases, 5120 comments=comments, 5121 this=this, 5122 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5123 ) 5124 self._match_r_paren(aliases) 5125 return aliases 5126 5127 alias = self._parse_id_var(any_token) or ( 5128 self.STRING_ALIASES and self._parse_string_as_identifier() 5129 ) 5130 5131 if alias: 5132 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5133 column = this.this 5134 5135 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5136 if not this.comments and column and column.comments: 5137 this.comments = column.comments 5138 column.comments = None 5139 5140 return this 5141 5142 def _parse_id_var( 5143 self, 5144 any_token: bool = True, 5145 tokens: t.Optional[t.Collection[TokenType]] = None, 5146 ) -> t.Optional[exp.Expression]: 5147 identifier = self._parse_identifier() 5148 5149 if identifier: 5150 return identifier 5151 5152 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5153 quoted = self._prev.token_type == TokenType.STRING 5154 return exp.Identifier(this=self._prev.text, quoted=quoted) 5155 5156 return None 5157 5158 def _parse_string(self) -> t.Optional[exp.Expression]: 5159 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5160 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5161 return self._parse_placeholder() 5162 5163 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5164 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5165 5166 def _parse_number(self) -> t.Optional[exp.Expression]: 5167 if self._match(TokenType.NUMBER): 5168 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5169 return self._parse_placeholder() 5170 5171 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5172 if self._match(TokenType.IDENTIFIER): 5173 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5174 return self._parse_placeholder() 5175 5176 def _parse_var( 5177 self, 5178 any_token: bool = False, 5179 tokens: t.Optional[t.Collection[TokenType]] = None, 5180 upper: bool = False, 5181 ) -> t.Optional[exp.Expression]: 5182 if ( 5183 (any_token and self._advance_any()) 5184 or self._match(TokenType.VAR) 5185 or (self._match_set(tokens) if tokens else False) 5186 ): 5187 return self.expression( 5188 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5189 ) 5190 return self._parse_placeholder() 5191 5192 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5193 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5194 self._advance() 5195 return self._prev 5196 return None 5197 5198 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5199 return self._parse_var() or self._parse_string() 5200 5201 def _parse_null(self) -> t.Optional[exp.Expression]: 5202 if self._match_set(self.NULL_TOKENS): 5203 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5204 return self._parse_placeholder() 5205 5206 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5207 if self._match(TokenType.TRUE): 5208 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5209 if self._match(TokenType.FALSE): 5210 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5211 return self._parse_placeholder() 5212 5213 def _parse_star(self) -> t.Optional[exp.Expression]: 5214 if self._match(TokenType.STAR): 5215 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5216 return self._parse_placeholder() 5217 5218 def _parse_parameter(self) -> exp.Parameter: 5219 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5220 return ( 5221 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5222 ) 5223 5224 self._match(TokenType.L_BRACE) 5225 this = _parse_parameter_part() 5226 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5227 self._match(TokenType.R_BRACE) 5228 5229 return self.expression(exp.Parameter, this=this, expression=expression) 5230 5231 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5232 if self._match_set(self.PLACEHOLDER_PARSERS): 5233 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5234 if placeholder: 5235 return placeholder 5236 self._advance(-1) 5237 return None 5238 5239 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5240 if not self._match(TokenType.EXCEPT): 5241 return None 5242 if self._match(TokenType.L_PAREN, advance=False): 5243 return self._parse_wrapped_csv(self._parse_column) 5244 5245 except_column = self._parse_column() 5246 return [except_column] if except_column else None 5247 5248 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5249 if not self._match(TokenType.REPLACE): 5250 return None 5251 if self._match(TokenType.L_PAREN, advance=False): 5252 return self._parse_wrapped_csv(self._parse_expression) 5253 5254 replace_expression = self._parse_expression() 5255 return [replace_expression] if replace_expression else None 5256 5257 def _parse_csv( 5258 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5259 ) -> t.List[exp.Expression]: 5260 parse_result = parse_method() 5261 items = [parse_result] if parse_result is not None else [] 5262 5263 while self._match(sep): 5264 self._add_comments(parse_result) 5265 parse_result = parse_method() 5266 if parse_result is not None: 5267 items.append(parse_result) 5268 5269 return items 5270 5271 def _parse_tokens( 5272 self, parse_method: t.Callable, expressions: t.Dict 5273 ) -> t.Optional[exp.Expression]: 5274 this = parse_method() 5275 5276 while self._match_set(expressions): 5277 this = self.expression( 5278 expressions[self._prev.token_type], 5279 this=this, 5280 comments=self._prev_comments, 5281 expression=parse_method(), 5282 ) 5283 5284 return this 5285 5286 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5287 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5288 5289 def _parse_wrapped_csv( 5290 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5291 ) -> t.List[exp.Expression]: 5292 return self._parse_wrapped( 5293 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5294 ) 5295 5296 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5297 wrapped = self._match(TokenType.L_PAREN) 5298 if not wrapped and not optional: 5299 self.raise_error("Expecting (") 5300 parse_result = parse_method() 5301 if wrapped: 5302 self._match_r_paren() 5303 return parse_result 5304 5305 def _parse_expressions(self) -> t.List[exp.Expression]: 5306 return self._parse_csv(self._parse_expression) 5307 5308 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5309 return self._parse_select() or self._parse_set_operations( 5310 self._parse_expression() if alias else self._parse_conjunction() 5311 ) 5312 5313 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5314 return self._parse_query_modifiers( 5315 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5316 ) 5317 5318 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5319 this = None 5320 if self._match_texts(self.TRANSACTION_KIND): 5321 this = self._prev.text 5322 5323 self._match_texts(("TRANSACTION", "WORK")) 5324 5325 modes = [] 5326 while True: 5327 mode = [] 5328 while self._match(TokenType.VAR): 5329 mode.append(self._prev.text) 5330 5331 if mode: 5332 modes.append(" ".join(mode)) 5333 if not self._match(TokenType.COMMA): 5334 break 5335 5336 return self.expression(exp.Transaction, this=this, modes=modes) 5337 5338 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5339 chain = None 5340 savepoint = None 5341 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5342 5343 self._match_texts(("TRANSACTION", "WORK")) 5344 5345 if self._match_text_seq("TO"): 5346 self._match_text_seq("SAVEPOINT") 5347 savepoint = self._parse_id_var() 5348 5349 if self._match(TokenType.AND): 5350 chain = not self._match_text_seq("NO") 5351 self._match_text_seq("CHAIN") 5352 5353 if is_rollback: 5354 return self.expression(exp.Rollback, savepoint=savepoint) 5355 5356 return self.expression(exp.Commit, chain=chain) 5357 5358 def _parse_refresh(self) -> exp.Refresh: 5359 self._match(TokenType.TABLE) 5360 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5361 5362 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5363 if not self._match_text_seq("ADD"): 5364 return None 5365 5366 self._match(TokenType.COLUMN) 5367 exists_column = self._parse_exists(not_=True) 5368 expression = self._parse_field_def() 5369 5370 if expression: 5371 expression.set("exists", exists_column) 5372 5373 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5374 if self._match_texts(("FIRST", "AFTER")): 5375 position = self._prev.text 5376 column_position = self.expression( 5377 exp.ColumnPosition, this=self._parse_column(), position=position 5378 ) 5379 expression.set("position", column_position) 5380 5381 return expression 5382 5383 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5384 drop = self._match(TokenType.DROP) and self._parse_drop() 5385 if drop and not isinstance(drop, exp.Command): 5386 drop.set("kind", drop.args.get("kind", "COLUMN")) 5387 return drop 5388 5389 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5390 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5391 return self.expression( 5392 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5393 ) 5394 5395 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5396 index = self._index - 1 5397 5398 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5399 return self._parse_csv( 5400 lambda: self.expression( 5401 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5402 ) 5403 ) 5404 5405 self._retreat(index) 5406 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5407 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5408 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5409 5410 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5411 self._match(TokenType.COLUMN) 5412 column = self._parse_field(any_token=True) 5413 5414 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5415 return self.expression(exp.AlterColumn, this=column, drop=True) 5416 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5417 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5418 if self._match(TokenType.COMMENT): 5419 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5420 5421 self._match_text_seq("SET", "DATA") 5422 return self.expression( 5423 exp.AlterColumn, 5424 this=column, 5425 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5426 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5427 using=self._match(TokenType.USING) and self._parse_conjunction(), 5428 ) 5429 5430 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5431 index = self._index - 1 5432 5433 partition_exists = self._parse_exists() 5434 if self._match(TokenType.PARTITION, advance=False): 5435 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5436 5437 self._retreat(index) 5438 return self._parse_csv(self._parse_drop_column) 5439 5440 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5441 if self._match(TokenType.COLUMN): 5442 exists = self._parse_exists() 5443 old_column = self._parse_column() 5444 to = self._match_text_seq("TO") 5445 new_column = self._parse_column() 5446 5447 if old_column is None or to is None or new_column is None: 5448 return None 5449 5450 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5451 5452 self._match_text_seq("TO") 5453 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5454 5455 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5456 start = self._prev 5457 5458 if not self._match(TokenType.TABLE): 5459 return self._parse_as_command(start) 5460 5461 exists = self._parse_exists() 5462 only = self._match_text_seq("ONLY") 5463 this = self._parse_table(schema=True) 5464 5465 if self._next: 5466 self._advance() 5467 5468 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5469 if parser: 5470 actions = ensure_list(parser(self)) 5471 5472 if not self._curr and actions: 5473 return self.expression( 5474 exp.AlterTable, 5475 this=this, 5476 exists=exists, 5477 actions=actions, 5478 only=only, 5479 ) 5480 5481 return self._parse_as_command(start) 5482 5483 def _parse_merge(self) -> exp.Merge: 5484 self._match(TokenType.INTO) 5485 target = self._parse_table() 5486 5487 if target and self._match(TokenType.ALIAS, advance=False): 5488 target.set("alias", self._parse_table_alias()) 5489 5490 self._match(TokenType.USING) 5491 using = self._parse_table() 5492 5493 self._match(TokenType.ON) 5494 on = self._parse_conjunction() 5495 5496 return self.expression( 5497 exp.Merge, 5498 this=target, 5499 using=using, 5500 on=on, 5501 expressions=self._parse_when_matched(), 5502 ) 5503 5504 def _parse_when_matched(self) -> t.List[exp.When]: 5505 whens = [] 5506 5507 while self._match(TokenType.WHEN): 5508 matched = not self._match(TokenType.NOT) 5509 self._match_text_seq("MATCHED") 5510 source = ( 5511 False 5512 if self._match_text_seq("BY", "TARGET") 5513 else self._match_text_seq("BY", "SOURCE") 5514 ) 5515 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5516 5517 self._match(TokenType.THEN) 5518 5519 if self._match(TokenType.INSERT): 5520 _this = self._parse_star() 5521 if _this: 5522 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5523 else: 5524 then = self.expression( 5525 exp.Insert, 5526 this=self._parse_value(), 5527 expression=self._match_text_seq("VALUES") and self._parse_value(), 5528 ) 5529 elif self._match(TokenType.UPDATE): 5530 expressions = self._parse_star() 5531 if expressions: 5532 then = self.expression(exp.Update, expressions=expressions) 5533 else: 5534 then = self.expression( 5535 exp.Update, 5536 expressions=self._match(TokenType.SET) 5537 and self._parse_csv(self._parse_equality), 5538 ) 5539 elif self._match(TokenType.DELETE): 5540 then = self.expression(exp.Var, this=self._prev.text) 5541 else: 5542 then = None 5543 5544 whens.append( 5545 self.expression( 5546 exp.When, 5547 matched=matched, 5548 source=source, 5549 condition=condition, 5550 then=then, 5551 ) 5552 ) 5553 return whens 5554 5555 def _parse_show(self) -> t.Optional[exp.Expression]: 5556 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5557 if parser: 5558 return parser(self) 5559 return self._parse_as_command(self._prev) 5560 5561 def _parse_set_item_assignment( 5562 self, kind: t.Optional[str] = None 5563 ) -> t.Optional[exp.Expression]: 5564 index = self._index 5565 5566 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5567 return self._parse_set_transaction(global_=kind == "GLOBAL") 5568 5569 left = self._parse_primary() or self._parse_id_var() 5570 assignment_delimiter = self._match_texts(("=", "TO")) 5571 5572 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5573 self._retreat(index) 5574 return None 5575 5576 right = self._parse_statement() or self._parse_id_var() 5577 this = self.expression(exp.EQ, this=left, expression=right) 5578 5579 return self.expression(exp.SetItem, this=this, kind=kind) 5580 5581 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5582 self._match_text_seq("TRANSACTION") 5583 characteristics = self._parse_csv( 5584 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5585 ) 5586 return self.expression( 5587 exp.SetItem, 5588 expressions=characteristics, 5589 kind="TRANSACTION", 5590 **{"global": global_}, # type: ignore 5591 ) 5592 5593 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5594 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5595 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5596 5597 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5598 index = self._index 5599 set_ = self.expression( 5600 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5601 ) 5602 5603 if self._curr: 5604 self._retreat(index) 5605 return self._parse_as_command(self._prev) 5606 5607 return set_ 5608 5609 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5610 for option in options: 5611 if self._match_text_seq(*option.split(" ")): 5612 return exp.var(option) 5613 return None 5614 5615 def _parse_as_command(self, start: Token) -> exp.Command: 5616 while self._curr: 5617 self._advance() 5618 text = self._find_sql(start, self._prev) 5619 size = len(start.text) 5620 self._warn_unsupported() 5621 return exp.Command(this=text[:size], expression=text[size:]) 5622 5623 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5624 settings = [] 5625 5626 self._match_l_paren() 5627 kind = self._parse_id_var() 5628 5629 if self._match(TokenType.L_PAREN): 5630 while True: 5631 key = self._parse_id_var() 5632 value = self._parse_primary() 5633 5634 if not key and value is None: 5635 break 5636 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5637 self._match(TokenType.R_PAREN) 5638 5639 self._match_r_paren() 5640 5641 return self.expression( 5642 exp.DictProperty, 5643 this=this, 5644 kind=kind.this if kind else None, 5645 settings=settings, 5646 ) 5647 5648 def _parse_dict_range(self, this: str) -> exp.DictRange: 5649 self._match_l_paren() 5650 has_min = self._match_text_seq("MIN") 5651 if has_min: 5652 min = self._parse_var() or self._parse_primary() 5653 self._match_text_seq("MAX") 5654 max = self._parse_var() or self._parse_primary() 5655 else: 5656 max = self._parse_var() or self._parse_primary() 5657 min = exp.Literal.number(0) 5658 self._match_r_paren() 5659 return self.expression(exp.DictRange, this=this, min=min, max=max) 5660 5661 def _parse_comprehension( 5662 self, this: t.Optional[exp.Expression] 5663 ) -> t.Optional[exp.Comprehension]: 5664 index = self._index 5665 expression = self._parse_column() 5666 if not self._match(TokenType.IN): 5667 self._retreat(index - 1) 5668 return None 5669 iterator = self._parse_column() 5670 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5671 return self.expression( 5672 exp.Comprehension, 5673 this=this, 5674 expression=expression, 5675 iterator=iterator, 5676 condition=condition, 5677 ) 5678 5679 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5680 if self._match(TokenType.HEREDOC_STRING): 5681 return self.expression(exp.Heredoc, this=self._prev.text) 5682 5683 if not self._match_text_seq("$"): 5684 return None 5685 5686 tags = ["$"] 5687 tag_text = None 5688 5689 if self._is_connected(): 5690 self._advance() 5691 tags.append(self._prev.text.upper()) 5692 else: 5693 self.raise_error("No closing $ found") 5694 5695 if tags[-1] != "$": 5696 if self._is_connected() and self._match_text_seq("$"): 5697 tag_text = tags[-1] 5698 tags.append("$") 5699 else: 5700 self.raise_error("No closing $ found") 5701 5702 heredoc_start = self._curr 5703 5704 while self._curr: 5705 if self._match_text_seq(*tags, advance=False): 5706 this = self._find_sql(heredoc_start, self._prev) 5707 self._advance(len(tags)) 5708 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5709 5710 self._advance() 5711 5712 self.raise_error(f"No closing {''.join(tags)} found") 5713 return None 5714 5715 def _find_parser( 5716 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5717 ) -> t.Optional[t.Callable]: 5718 if not self._curr: 5719 return None 5720 5721 index = self._index 5722 this = [] 5723 while True: 5724 # The current token might be multiple words 5725 curr = self._curr.text.upper() 5726 key = curr.split(" ") 5727 this.append(curr) 5728 5729 self._advance() 5730 result, trie = in_trie(trie, key) 5731 if result == TrieResult.FAILED: 5732 break 5733 5734 if result == TrieResult.EXISTS: 5735 subparser = parsers[" ".join(this)] 5736 return subparser 5737 5738 self._retreat(index) 5739 return None 5740 5741 def _match(self, token_type, advance=True, expression=None): 5742 if not self._curr: 5743 return None 5744 5745 if self._curr.token_type == token_type: 5746 if advance: 5747 self._advance() 5748 self._add_comments(expression) 5749 return True 5750 5751 return None 5752 5753 def _match_set(self, types, advance=True): 5754 if not self._curr: 5755 return None 5756 5757 if self._curr.token_type in types: 5758 if advance: 5759 self._advance() 5760 return True 5761 5762 return None 5763 5764 def _match_pair(self, token_type_a, token_type_b, advance=True): 5765 if not self._curr or not self._next: 5766 return None 5767 5768 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5769 if advance: 5770 self._advance(2) 5771 return True 5772 5773 return None 5774 5775 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5776 if not self._match(TokenType.L_PAREN, expression=expression): 5777 self.raise_error("Expecting (") 5778 5779 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5780 if not self._match(TokenType.R_PAREN, expression=expression): 5781 self.raise_error("Expecting )") 5782 5783 def _match_texts(self, texts, advance=True): 5784 if self._curr and self._curr.text.upper() in texts: 5785 if advance: 5786 self._advance() 5787 return True 5788 return None 5789 5790 def _match_text_seq(self, *texts, advance=True): 5791 index = self._index 5792 for text in texts: 5793 if self._curr and self._curr.text.upper() == text: 5794 self._advance() 5795 else: 5796 self._retreat(index) 5797 return None 5798 5799 if not advance: 5800 self._retreat(index) 5801 5802 return True 5803 5804 @t.overload 5805 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5806 ... 5807 5808 @t.overload 5809 def _replace_columns_with_dots( 5810 self, this: t.Optional[exp.Expression] 5811 ) -> t.Optional[exp.Expression]: 5812 ... 5813 5814 def _replace_columns_with_dots(self, this): 5815 if isinstance(this, exp.Dot): 5816 exp.replace_children(this, self._replace_columns_with_dots) 5817 elif isinstance(this, exp.Column): 5818 exp.replace_children(this, self._replace_columns_with_dots) 5819 table = this.args.get("table") 5820 this = ( 5821 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5822 ) 5823 5824 return this 5825 5826 def _replace_lambda( 5827 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5828 ) -> t.Optional[exp.Expression]: 5829 if not node: 5830 return node 5831 5832 for column in node.find_all(exp.Column): 5833 if column.parts[0].name in lambda_variables: 5834 dot_or_id = column.to_dot() if column.table else column.this 5835 parent = column.parent 5836 5837 while isinstance(parent, exp.Dot): 5838 if not isinstance(parent.parent, exp.Dot): 5839 parent.replace(dot_or_id) 5840 break 5841 parent = parent.parent 5842 else: 5843 if column is node: 5844 node = dot_or_id 5845 else: 5846 column.replace(dot_or_id) 5847 return node
22def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
48def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 49 # Default argument order is base, expression 50 this = seq_get(args, 0) 51 expression = seq_get(args, 1) 52 53 if expression: 54 if not dialect.LOG_BASE_FIRST: 55 this, expression = expression, this 56 return exp.Log(this=this, expression=expression) 57 58 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
61def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 62 def _builder(args: t.List, dialect: Dialect) -> E: 63 expression = expr_type( 64 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 65 ) 66 if len(args) > 2 and expr_type is exp.JSONExtract: 67 expression.set("expressions", args[2:]) 68 69 return expression 70 71 return _builder
84class Parser(metaclass=_Parser): 85 """ 86 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 87 88 Args: 89 error_level: The desired error level. 90 Default: ErrorLevel.IMMEDIATE 91 error_message_context: The amount of context to capture from a query string when displaying 92 the error message (in number of characters). 93 Default: 100 94 max_errors: Maximum number of error messages to include in a raised ParseError. 95 This is only relevant if error_level is ErrorLevel.RAISE. 96 Default: 3 97 """ 98 99 FUNCTIONS: t.Dict[str, t.Callable] = { 100 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 101 "CONCAT": lambda args, dialect: exp.Concat( 102 expressions=args, 103 safe=not dialect.STRICT_STRING_CONCAT, 104 coalesce=dialect.CONCAT_COALESCE, 105 ), 106 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 107 expressions=args, 108 safe=not dialect.STRICT_STRING_CONCAT, 109 coalesce=dialect.CONCAT_COALESCE, 110 ), 111 "DATE_TO_DATE_STR": lambda args: exp.Cast( 112 this=seq_get(args, 0), 113 to=exp.DataType(this=exp.DataType.Type.TEXT), 114 ), 115 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 116 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 117 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 118 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 119 "LIKE": build_like, 120 "LOG": build_logarithm, 121 "TIME_TO_TIME_STR": lambda args: exp.Cast( 122 this=seq_get(args, 0), 123 to=exp.DataType(this=exp.DataType.Type.TEXT), 124 ), 125 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 126 this=exp.Cast( 127 this=seq_get(args, 0), 128 to=exp.DataType(this=exp.DataType.Type.TEXT), 129 ), 130 start=exp.Literal.number(1), 131 length=exp.Literal.number(10), 132 ), 133 "VAR_MAP": build_var_map, 134 } 135 136 NO_PAREN_FUNCTIONS = { 137 TokenType.CURRENT_DATE: exp.CurrentDate, 138 TokenType.CURRENT_DATETIME: exp.CurrentDate, 139 TokenType.CURRENT_TIME: exp.CurrentTime, 140 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 141 TokenType.CURRENT_USER: exp.CurrentUser, 142 } 143 144 STRUCT_TYPE_TOKENS = { 145 TokenType.NESTED, 146 TokenType.STRUCT, 147 } 148 149 NESTED_TYPE_TOKENS = { 150 TokenType.ARRAY, 151 TokenType.LOWCARDINALITY, 152 TokenType.MAP, 153 TokenType.NULLABLE, 154 *STRUCT_TYPE_TOKENS, 155 } 156 157 ENUM_TYPE_TOKENS = { 158 TokenType.ENUM, 159 TokenType.ENUM8, 160 TokenType.ENUM16, 161 } 162 163 AGGREGATE_TYPE_TOKENS = { 164 TokenType.AGGREGATEFUNCTION, 165 TokenType.SIMPLEAGGREGATEFUNCTION, 166 } 167 168 TYPE_TOKENS = { 169 TokenType.BIT, 170 TokenType.BOOLEAN, 171 TokenType.TINYINT, 172 TokenType.UTINYINT, 173 TokenType.SMALLINT, 174 TokenType.USMALLINT, 175 TokenType.INT, 176 TokenType.UINT, 177 TokenType.BIGINT, 178 TokenType.UBIGINT, 179 TokenType.INT128, 180 TokenType.UINT128, 181 TokenType.INT256, 182 TokenType.UINT256, 183 TokenType.MEDIUMINT, 184 TokenType.UMEDIUMINT, 185 TokenType.FIXEDSTRING, 186 TokenType.FLOAT, 187 TokenType.DOUBLE, 188 TokenType.CHAR, 189 TokenType.NCHAR, 190 TokenType.VARCHAR, 191 TokenType.NVARCHAR, 192 TokenType.BPCHAR, 193 TokenType.TEXT, 194 TokenType.MEDIUMTEXT, 195 TokenType.LONGTEXT, 196 TokenType.MEDIUMBLOB, 197 TokenType.LONGBLOB, 198 TokenType.BINARY, 199 TokenType.VARBINARY, 200 TokenType.JSON, 201 TokenType.JSONB, 202 TokenType.INTERVAL, 203 TokenType.TINYBLOB, 204 TokenType.TINYTEXT, 205 TokenType.TIME, 206 TokenType.TIMETZ, 207 TokenType.TIMESTAMP, 208 TokenType.TIMESTAMP_S, 209 TokenType.TIMESTAMP_MS, 210 TokenType.TIMESTAMP_NS, 211 TokenType.TIMESTAMPTZ, 212 TokenType.TIMESTAMPLTZ, 213 TokenType.DATETIME, 214 TokenType.DATETIME64, 215 TokenType.DATE, 216 TokenType.DATE32, 217 TokenType.INT4RANGE, 218 TokenType.INT4MULTIRANGE, 219 TokenType.INT8RANGE, 220 TokenType.INT8MULTIRANGE, 221 TokenType.NUMRANGE, 222 TokenType.NUMMULTIRANGE, 223 TokenType.TSRANGE, 224 TokenType.TSMULTIRANGE, 225 TokenType.TSTZRANGE, 226 TokenType.TSTZMULTIRANGE, 227 TokenType.DATERANGE, 228 TokenType.DATEMULTIRANGE, 229 TokenType.DECIMAL, 230 TokenType.UDECIMAL, 231 TokenType.BIGDECIMAL, 232 TokenType.UUID, 233 TokenType.GEOGRAPHY, 234 TokenType.GEOMETRY, 235 TokenType.HLLSKETCH, 236 TokenType.HSTORE, 237 TokenType.PSEUDO_TYPE, 238 TokenType.SUPER, 239 TokenType.SERIAL, 240 TokenType.SMALLSERIAL, 241 TokenType.BIGSERIAL, 242 TokenType.XML, 243 TokenType.YEAR, 244 TokenType.UNIQUEIDENTIFIER, 245 TokenType.USERDEFINED, 246 TokenType.MONEY, 247 TokenType.SMALLMONEY, 248 TokenType.ROWVERSION, 249 TokenType.IMAGE, 250 TokenType.VARIANT, 251 TokenType.OBJECT, 252 TokenType.OBJECT_IDENTIFIER, 253 TokenType.INET, 254 TokenType.IPADDRESS, 255 TokenType.IPPREFIX, 256 TokenType.IPV4, 257 TokenType.IPV6, 258 TokenType.UNKNOWN, 259 TokenType.NULL, 260 *ENUM_TYPE_TOKENS, 261 *NESTED_TYPE_TOKENS, 262 *AGGREGATE_TYPE_TOKENS, 263 } 264 265 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 266 TokenType.BIGINT: TokenType.UBIGINT, 267 TokenType.INT: TokenType.UINT, 268 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 269 TokenType.SMALLINT: TokenType.USMALLINT, 270 TokenType.TINYINT: TokenType.UTINYINT, 271 TokenType.DECIMAL: TokenType.UDECIMAL, 272 } 273 274 SUBQUERY_PREDICATES = { 275 TokenType.ANY: exp.Any, 276 TokenType.ALL: exp.All, 277 TokenType.EXISTS: exp.Exists, 278 TokenType.SOME: exp.Any, 279 } 280 281 RESERVED_TOKENS = { 282 *Tokenizer.SINGLE_TOKENS.values(), 283 TokenType.SELECT, 284 } 285 286 DB_CREATABLES = { 287 TokenType.DATABASE, 288 TokenType.SCHEMA, 289 TokenType.TABLE, 290 TokenType.VIEW, 291 TokenType.MODEL, 292 TokenType.DICTIONARY, 293 TokenType.STORAGE_INTEGRATION, 294 } 295 296 CREATABLES = { 297 TokenType.COLUMN, 298 TokenType.CONSTRAINT, 299 TokenType.FUNCTION, 300 TokenType.INDEX, 301 TokenType.PROCEDURE, 302 TokenType.FOREIGN_KEY, 303 *DB_CREATABLES, 304 } 305 306 # Tokens that can represent identifiers 307 ID_VAR_TOKENS = { 308 TokenType.VAR, 309 TokenType.ANTI, 310 TokenType.APPLY, 311 TokenType.ASC, 312 TokenType.AUTO_INCREMENT, 313 TokenType.BEGIN, 314 TokenType.BPCHAR, 315 TokenType.CACHE, 316 TokenType.CASE, 317 TokenType.COLLATE, 318 TokenType.COMMAND, 319 TokenType.COMMENT, 320 TokenType.COMMIT, 321 TokenType.CONSTRAINT, 322 TokenType.DEFAULT, 323 TokenType.DELETE, 324 TokenType.DESC, 325 TokenType.DESCRIBE, 326 TokenType.DICTIONARY, 327 TokenType.DIV, 328 TokenType.END, 329 TokenType.EXECUTE, 330 TokenType.ESCAPE, 331 TokenType.FALSE, 332 TokenType.FIRST, 333 TokenType.FILTER, 334 TokenType.FINAL, 335 TokenType.FORMAT, 336 TokenType.FULL, 337 TokenType.IS, 338 TokenType.ISNULL, 339 TokenType.INTERVAL, 340 TokenType.KEEP, 341 TokenType.KILL, 342 TokenType.LEFT, 343 TokenType.LOAD, 344 TokenType.MERGE, 345 TokenType.NATURAL, 346 TokenType.NEXT, 347 TokenType.OFFSET, 348 TokenType.OPERATOR, 349 TokenType.ORDINALITY, 350 TokenType.OVERLAPS, 351 TokenType.OVERWRITE, 352 TokenType.PARTITION, 353 TokenType.PERCENT, 354 TokenType.PIVOT, 355 TokenType.PRAGMA, 356 TokenType.RANGE, 357 TokenType.RECURSIVE, 358 TokenType.REFERENCES, 359 TokenType.REFRESH, 360 TokenType.REPLACE, 361 TokenType.RIGHT, 362 TokenType.ROW, 363 TokenType.ROWS, 364 TokenType.SEMI, 365 TokenType.SET, 366 TokenType.SETTINGS, 367 TokenType.SHOW, 368 TokenType.TEMPORARY, 369 TokenType.TOP, 370 TokenType.TRUE, 371 TokenType.UNIQUE, 372 TokenType.UNPIVOT, 373 TokenType.UPDATE, 374 TokenType.USE, 375 TokenType.VOLATILE, 376 TokenType.WINDOW, 377 *CREATABLES, 378 *SUBQUERY_PREDICATES, 379 *TYPE_TOKENS, 380 *NO_PAREN_FUNCTIONS, 381 } 382 383 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 384 385 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 386 TokenType.ANTI, 387 TokenType.APPLY, 388 TokenType.ASOF, 389 TokenType.FULL, 390 TokenType.LEFT, 391 TokenType.LOCK, 392 TokenType.NATURAL, 393 TokenType.OFFSET, 394 TokenType.RIGHT, 395 TokenType.SEMI, 396 TokenType.WINDOW, 397 } 398 399 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 400 401 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 402 403 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 404 405 FUNC_TOKENS = { 406 TokenType.COLLATE, 407 TokenType.COMMAND, 408 TokenType.CURRENT_DATE, 409 TokenType.CURRENT_DATETIME, 410 TokenType.CURRENT_TIMESTAMP, 411 TokenType.CURRENT_TIME, 412 TokenType.CURRENT_USER, 413 TokenType.FILTER, 414 TokenType.FIRST, 415 TokenType.FORMAT, 416 TokenType.GLOB, 417 TokenType.IDENTIFIER, 418 TokenType.INDEX, 419 TokenType.ISNULL, 420 TokenType.ILIKE, 421 TokenType.INSERT, 422 TokenType.LIKE, 423 TokenType.MERGE, 424 TokenType.OFFSET, 425 TokenType.PRIMARY_KEY, 426 TokenType.RANGE, 427 TokenType.REPLACE, 428 TokenType.RLIKE, 429 TokenType.ROW, 430 TokenType.UNNEST, 431 TokenType.VAR, 432 TokenType.LEFT, 433 TokenType.RIGHT, 434 TokenType.DATE, 435 TokenType.DATETIME, 436 TokenType.TABLE, 437 TokenType.TIMESTAMP, 438 TokenType.TIMESTAMPTZ, 439 TokenType.WINDOW, 440 TokenType.XOR, 441 *TYPE_TOKENS, 442 *SUBQUERY_PREDICATES, 443 } 444 445 CONJUNCTION = { 446 TokenType.AND: exp.And, 447 TokenType.OR: exp.Or, 448 } 449 450 EQUALITY = { 451 TokenType.COLON_EQ: exp.PropertyEQ, 452 TokenType.EQ: exp.EQ, 453 TokenType.NEQ: exp.NEQ, 454 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 455 } 456 457 COMPARISON = { 458 TokenType.GT: exp.GT, 459 TokenType.GTE: exp.GTE, 460 TokenType.LT: exp.LT, 461 TokenType.LTE: exp.LTE, 462 } 463 464 BITWISE = { 465 TokenType.AMP: exp.BitwiseAnd, 466 TokenType.CARET: exp.BitwiseXor, 467 TokenType.PIPE: exp.BitwiseOr, 468 } 469 470 TERM = { 471 TokenType.DASH: exp.Sub, 472 TokenType.PLUS: exp.Add, 473 TokenType.MOD: exp.Mod, 474 TokenType.COLLATE: exp.Collate, 475 } 476 477 FACTOR = { 478 TokenType.DIV: exp.IntDiv, 479 TokenType.LR_ARROW: exp.Distance, 480 TokenType.SLASH: exp.Div, 481 TokenType.STAR: exp.Mul, 482 } 483 484 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 485 486 TIMES = { 487 TokenType.TIME, 488 TokenType.TIMETZ, 489 } 490 491 TIMESTAMPS = { 492 TokenType.TIMESTAMP, 493 TokenType.TIMESTAMPTZ, 494 TokenType.TIMESTAMPLTZ, 495 *TIMES, 496 } 497 498 SET_OPERATIONS = { 499 TokenType.UNION, 500 TokenType.INTERSECT, 501 TokenType.EXCEPT, 502 } 503 504 JOIN_METHODS = { 505 TokenType.NATURAL, 506 TokenType.ASOF, 507 } 508 509 JOIN_SIDES = { 510 TokenType.LEFT, 511 TokenType.RIGHT, 512 TokenType.FULL, 513 } 514 515 JOIN_KINDS = { 516 TokenType.INNER, 517 TokenType.OUTER, 518 TokenType.CROSS, 519 TokenType.SEMI, 520 TokenType.ANTI, 521 } 522 523 JOIN_HINTS: t.Set[str] = set() 524 525 LAMBDAS = { 526 TokenType.ARROW: lambda self, expressions: self.expression( 527 exp.Lambda, 528 this=self._replace_lambda( 529 self._parse_conjunction(), 530 {node.name for node in expressions}, 531 ), 532 expressions=expressions, 533 ), 534 TokenType.FARROW: lambda self, expressions: self.expression( 535 exp.Kwarg, 536 this=exp.var(expressions[0].name), 537 expression=self._parse_conjunction(), 538 ), 539 } 540 541 COLUMN_OPERATORS = { 542 TokenType.DOT: None, 543 TokenType.DCOLON: lambda self, this, to: self.expression( 544 exp.Cast if self.STRICT_CAST else exp.TryCast, 545 this=this, 546 to=to, 547 ), 548 TokenType.ARROW: lambda self, this, path: self.expression( 549 exp.JSONExtract, 550 this=this, 551 expression=self.dialect.to_json_path(path), 552 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 643 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 644 } 645 646 PRIMARY_PARSERS = { 647 TokenType.STRING: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=True 649 ), 650 TokenType.NUMBER: lambda self, token: self.expression( 651 exp.Literal, this=token.text, is_string=False 652 ), 653 TokenType.STAR: lambda self, _: self.expression( 654 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 655 ), 656 TokenType.NULL: lambda self, _: self.expression(exp.Null), 657 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 658 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 659 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 660 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 661 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 662 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 668 exp.RawString, this=token.text 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 676 } 677 678 PLACEHOLDER_PARSERS = { 679 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 680 TokenType.PARAMETER: lambda self: self._parse_parameter(), 681 TokenType.COLON: lambda self: ( 682 self.expression(exp.Placeholder, this=self._prev.text) 683 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 684 else None 685 ), 686 } 687 688 RANGE_PARSERS = { 689 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 690 TokenType.GLOB: binary_range_parser(exp.Glob), 691 TokenType.ILIKE: binary_range_parser(exp.ILike), 692 TokenType.IN: lambda self, this: self._parse_in(this), 693 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 694 TokenType.IS: lambda self, this: self._parse_is(this), 695 TokenType.LIKE: binary_range_parser(exp.Like), 696 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 697 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 698 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 699 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 700 } 701 702 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 703 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 704 "AUTO": lambda self: self._parse_auto_property(), 705 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 706 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 707 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 709 "CHECKSUM": lambda self: self._parse_checksum(), 710 "CLUSTER BY": lambda self: self._parse_cluster(), 711 "CLUSTERED": lambda self: self._parse_clustered_by(), 712 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 713 exp.CollateProperty, **kwargs 714 ), 715 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 716 "CONTAINS": lambda self: self._parse_contains_property(), 717 "COPY": lambda self: self._parse_copy_property(), 718 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 719 "DEFINER": lambda self: self._parse_definer(), 720 "DETERMINISTIC": lambda self: self.expression( 721 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 722 ), 723 "DISTKEY": lambda self: self._parse_distkey(), 724 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 725 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 726 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 727 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 728 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 729 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 730 "FREESPACE": lambda self: self._parse_freespace(), 731 "HEAP": lambda self: self.expression(exp.HeapProperty), 732 "IMMUTABLE": lambda self: self.expression( 733 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 734 ), 735 "INHERITS": lambda self: self.expression( 736 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 737 ), 738 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 739 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 740 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 741 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 742 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 743 "LIKE": lambda self: self._parse_create_like(), 744 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 745 "LOCK": lambda self: self._parse_locking(), 746 "LOCKING": lambda self: self._parse_locking(), 747 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 748 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 749 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 750 "MODIFIES": lambda self: self._parse_modifies_property(), 751 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 752 "NO": lambda self: self._parse_no_property(), 753 "ON": lambda self: self._parse_on_property(), 754 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 755 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 756 "PARTITION": lambda self: self._parse_partitioned_of(), 757 "PARTITION BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 759 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 760 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 761 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 762 "READS": lambda self: self._parse_reads_property(), 763 "REMOTE": lambda self: self._parse_remote_with_connection(), 764 "RETURNS": lambda self: self._parse_returns(), 765 "ROW": lambda self: self._parse_row(), 766 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 767 "SAMPLE": lambda self: self.expression( 768 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 769 ), 770 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 771 "SETTINGS": lambda self: self.expression( 772 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 773 ), 774 "SORTKEY": lambda self: self._parse_sortkey(), 775 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 776 "STABLE": lambda self: self.expression( 777 exp.StabilityProperty, this=exp.Literal.string("STABLE") 778 ), 779 "STORED": lambda self: self._parse_stored(), 780 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 781 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 782 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 783 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 784 "TO": lambda self: self._parse_to_table(), 785 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 786 "TRANSFORM": lambda self: self.expression( 787 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 788 ), 789 "TTL": lambda self: self._parse_ttl(), 790 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 791 "VOLATILE": lambda self: self._parse_volatile_property(), 792 "WITH": lambda self: self._parse_with_property(), 793 } 794 795 CONSTRAINT_PARSERS = { 796 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 797 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 798 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 799 "CHARACTER SET": lambda self: self.expression( 800 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 801 ), 802 "CHECK": lambda self: self.expression( 803 exp.CheckColumnConstraint, 804 this=self._parse_wrapped(self._parse_conjunction), 805 enforced=self._match_text_seq("ENFORCED"), 806 ), 807 "COLLATE": lambda self: self.expression( 808 exp.CollateColumnConstraint, this=self._parse_var() 809 ), 810 "COMMENT": lambda self: self.expression( 811 exp.CommentColumnConstraint, this=self._parse_string() 812 ), 813 "COMPRESS": lambda self: self._parse_compress(), 814 "CLUSTERED": lambda self: self.expression( 815 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 816 ), 817 "NONCLUSTERED": lambda self: self.expression( 818 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 819 ), 820 "DEFAULT": lambda self: self.expression( 821 exp.DefaultColumnConstraint, this=self._parse_bitwise() 822 ), 823 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 824 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 825 "FORMAT": lambda self: self.expression( 826 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 827 ), 828 "GENERATED": lambda self: self._parse_generated_as_identity(), 829 "IDENTITY": lambda self: self._parse_auto_increment(), 830 "INLINE": lambda self: self._parse_inline(), 831 "LIKE": lambda self: self._parse_create_like(), 832 "NOT": lambda self: self._parse_not_constraint(), 833 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 834 "ON": lambda self: ( 835 self._match(TokenType.UPDATE) 836 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 837 ) 838 or self.expression(exp.OnProperty, this=self._parse_id_var()), 839 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 840 "PERIOD": lambda self: self._parse_period_for_system_time(), 841 "PRIMARY KEY": lambda self: self._parse_primary_key(), 842 "REFERENCES": lambda self: self._parse_references(match=False), 843 "TITLE": lambda self: self.expression( 844 exp.TitleColumnConstraint, this=self._parse_var_or_string() 845 ), 846 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 847 "UNIQUE": lambda self: self._parse_unique(), 848 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 849 "WITH": lambda self: self.expression( 850 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 851 ), 852 } 853 854 ALTER_PARSERS = { 855 "ADD": lambda self: self._parse_alter_table_add(), 856 "ALTER": lambda self: self._parse_alter_table_alter(), 857 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 858 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 859 "DROP": lambda self: self._parse_alter_table_drop(), 860 "RENAME": lambda self: self._parse_alter_table_rename(), 861 } 862 863 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 864 865 NO_PAREN_FUNCTION_PARSERS = { 866 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 867 "CASE": lambda self: self._parse_case(), 868 "IF": lambda self: self._parse_if(), 869 "NEXT": lambda self: self._parse_next_value_for(), 870 } 871 872 INVALID_FUNC_NAME_TOKENS = { 873 TokenType.IDENTIFIER, 874 TokenType.STRING, 875 } 876 877 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 878 879 FUNCTION_PARSERS = { 880 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 881 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 882 "DECODE": lambda self: self._parse_decode(), 883 "EXTRACT": lambda self: self._parse_extract(), 884 "JSON_OBJECT": lambda self: self._parse_json_object(), 885 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 886 "JSON_TABLE": lambda self: self._parse_json_table(), 887 "MATCH": lambda self: self._parse_match_against(), 888 "OPENJSON": lambda self: self._parse_open_json(), 889 "POSITION": lambda self: self._parse_position(), 890 "PREDICT": lambda self: self._parse_predict(), 891 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 892 "STRING_AGG": lambda self: self._parse_string_agg(), 893 "SUBSTRING": lambda self: self._parse_substring(), 894 "TRIM": lambda self: self._parse_trim(), 895 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 896 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 897 } 898 899 QUERY_MODIFIER_PARSERS = { 900 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 901 TokenType.WHERE: lambda self: ("where", self._parse_where()), 902 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 903 TokenType.HAVING: lambda self: ("having", self._parse_having()), 904 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 905 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 906 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 907 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 908 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 909 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 910 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 911 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 912 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 913 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 914 TokenType.CLUSTER_BY: lambda self: ( 915 "cluster", 916 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 917 ), 918 TokenType.DISTRIBUTE_BY: lambda self: ( 919 "distribute", 920 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 921 ), 922 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 923 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 924 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 925 } 926 927 SET_PARSERS = { 928 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 929 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 930 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 931 "TRANSACTION": lambda self: self._parse_set_transaction(), 932 } 933 934 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 935 936 TYPE_LITERAL_PARSERS = { 937 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 938 } 939 940 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 941 942 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 943 944 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 945 946 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 947 TRANSACTION_CHARACTERISTICS = { 948 "ISOLATION LEVEL REPEATABLE READ", 949 "ISOLATION LEVEL READ COMMITTED", 950 "ISOLATION LEVEL READ UNCOMMITTED", 951 "ISOLATION LEVEL SERIALIZABLE", 952 "READ WRITE", 953 "READ ONLY", 954 } 955 956 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 957 958 CLONE_KEYWORDS = {"CLONE", "COPY"} 959 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 960 961 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 962 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 963 964 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 965 966 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 967 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 968 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 969 970 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 971 972 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 973 974 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 975 976 DISTINCT_TOKENS = {TokenType.DISTINCT} 977 978 NULL_TOKENS = {TokenType.NULL} 979 980 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 981 982 STRICT_CAST = True 983 984 PREFIXED_PIVOT_COLUMNS = False 985 IDENTIFY_PIVOT_STRINGS = False 986 987 LOG_DEFAULTS_TO_LN = False 988 989 # Whether ADD is present for each column added by ALTER TABLE 990 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 991 992 # Whether the table sample clause expects CSV syntax 993 TABLESAMPLE_CSV = False 994 995 # Whether the SET command needs a delimiter (e.g. "=") for assignments 996 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 997 998 # Whether the TRIM function expects the characters to trim as its first argument 999 TRIM_PATTERN_FIRST = False 1000 1001 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1002 STRING_ALIASES = False 1003 1004 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1005 MODIFIERS_ATTACHED_TO_UNION = True 1006 UNION_MODIFIERS = {"order", "limit", "offset"} 1007 1008 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1009 NO_PAREN_IF_COMMANDS = True 1010 1011 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1012 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1013 1014 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1015 # If this is True and '(' is not found, the keyword will be treated as an identifier 1016 VALUES_FOLLOWED_BY_PAREN = True 1017 1018 __slots__ = ( 1019 "error_level", 1020 "error_message_context", 1021 "max_errors", 1022 "dialect", 1023 "sql", 1024 "errors", 1025 "_tokens", 1026 "_index", 1027 "_curr", 1028 "_next", 1029 "_prev", 1030 "_prev_comments", 1031 ) 1032 1033 # Autofilled 1034 SHOW_TRIE: t.Dict = {} 1035 SET_TRIE: t.Dict = {} 1036 1037 def __init__( 1038 self, 1039 error_level: t.Optional[ErrorLevel] = None, 1040 error_message_context: int = 100, 1041 max_errors: int = 3, 1042 dialect: DialectType = None, 1043 ): 1044 from sqlglot.dialects import Dialect 1045 1046 self.error_level = error_level or ErrorLevel.IMMEDIATE 1047 self.error_message_context = error_message_context 1048 self.max_errors = max_errors 1049 self.dialect = Dialect.get_or_raise(dialect) 1050 self.reset() 1051 1052 def reset(self): 1053 self.sql = "" 1054 self.errors = [] 1055 self._tokens = [] 1056 self._index = 0 1057 self._curr = None 1058 self._next = None 1059 self._prev = None 1060 self._prev_comments = None 1061 1062 def parse( 1063 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1064 ) -> t.List[t.Optional[exp.Expression]]: 1065 """ 1066 Parses a list of tokens and returns a list of syntax trees, one tree 1067 per parsed SQL statement. 1068 1069 Args: 1070 raw_tokens: The list of tokens. 1071 sql: The original SQL string, used to produce helpful debug messages. 1072 1073 Returns: 1074 The list of the produced syntax trees. 1075 """ 1076 return self._parse( 1077 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1078 ) 1079 1080 def parse_into( 1081 self, 1082 expression_types: exp.IntoType, 1083 raw_tokens: t.List[Token], 1084 sql: t.Optional[str] = None, 1085 ) -> t.List[t.Optional[exp.Expression]]: 1086 """ 1087 Parses a list of tokens into a given Expression type. If a collection of Expression 1088 types is given instead, this method will try to parse the token list into each one 1089 of them, stopping at the first for which the parsing succeeds. 1090 1091 Args: 1092 expression_types: The expression type(s) to try and parse the token list into. 1093 raw_tokens: The list of tokens. 1094 sql: The original SQL string, used to produce helpful debug messages. 1095 1096 Returns: 1097 The target Expression. 1098 """ 1099 errors = [] 1100 for expression_type in ensure_list(expression_types): 1101 parser = self.EXPRESSION_PARSERS.get(expression_type) 1102 if not parser: 1103 raise TypeError(f"No parser registered for {expression_type}") 1104 1105 try: 1106 return self._parse(parser, raw_tokens, sql) 1107 except ParseError as e: 1108 e.errors[0]["into_expression"] = expression_type 1109 errors.append(e) 1110 1111 raise ParseError( 1112 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1113 errors=merge_errors(errors), 1114 ) from errors[-1] 1115 1116 def _parse( 1117 self, 1118 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1119 raw_tokens: t.List[Token], 1120 sql: t.Optional[str] = None, 1121 ) -> t.List[t.Optional[exp.Expression]]: 1122 self.reset() 1123 self.sql = sql or "" 1124 1125 total = len(raw_tokens) 1126 chunks: t.List[t.List[Token]] = [[]] 1127 1128 for i, token in enumerate(raw_tokens): 1129 if token.token_type == TokenType.SEMICOLON: 1130 if i < total - 1: 1131 chunks.append([]) 1132 else: 1133 chunks[-1].append(token) 1134 1135 expressions = [] 1136 1137 for tokens in chunks: 1138 self._index = -1 1139 self._tokens = tokens 1140 self._advance() 1141 1142 expressions.append(parse_method(self)) 1143 1144 if self._index < len(self._tokens): 1145 self.raise_error("Invalid expression / Unexpected token") 1146 1147 self.check_errors() 1148 1149 return expressions 1150 1151 def check_errors(self) -> None: 1152 """Logs or raises any found errors, depending on the chosen error level setting.""" 1153 if self.error_level == ErrorLevel.WARN: 1154 for error in self.errors: 1155 logger.error(str(error)) 1156 elif self.error_level == ErrorLevel.RAISE and self.errors: 1157 raise ParseError( 1158 concat_messages(self.errors, self.max_errors), 1159 errors=merge_errors(self.errors), 1160 ) 1161 1162 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1163 """ 1164 Appends an error in the list of recorded errors or raises it, depending on the chosen 1165 error level setting. 1166 """ 1167 token = token or self._curr or self._prev or Token.string("") 1168 start = token.start 1169 end = token.end + 1 1170 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1171 highlight = self.sql[start:end] 1172 end_context = self.sql[end : end + self.error_message_context] 1173 1174 error = ParseError.new( 1175 f"{message}. Line {token.line}, Col: {token.col}.\n" 1176 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1177 description=message, 1178 line=token.line, 1179 col=token.col, 1180 start_context=start_context, 1181 highlight=highlight, 1182 end_context=end_context, 1183 ) 1184 1185 if self.error_level == ErrorLevel.IMMEDIATE: 1186 raise error 1187 1188 self.errors.append(error) 1189 1190 def expression( 1191 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1192 ) -> E: 1193 """ 1194 Creates a new, validated Expression. 1195 1196 Args: 1197 exp_class: The expression class to instantiate. 1198 comments: An optional list of comments to attach to the expression. 1199 kwargs: The arguments to set for the expression along with their respective values. 1200 1201 Returns: 1202 The target expression. 1203 """ 1204 instance = exp_class(**kwargs) 1205 instance.add_comments(comments) if comments else self._add_comments(instance) 1206 return self.validate_expression(instance) 1207 1208 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1209 if expression and self._prev_comments: 1210 expression.add_comments(self._prev_comments) 1211 self._prev_comments = None 1212 1213 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1214 """ 1215 Validates an Expression, making sure that all its mandatory arguments are set. 1216 1217 Args: 1218 expression: The expression to validate. 1219 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1220 1221 Returns: 1222 The validated expression. 1223 """ 1224 if self.error_level != ErrorLevel.IGNORE: 1225 for error_message in expression.error_messages(args): 1226 self.raise_error(error_message) 1227 1228 return expression 1229 1230 def _find_sql(self, start: Token, end: Token) -> str: 1231 return self.sql[start.start : end.end + 1] 1232 1233 def _is_connected(self) -> bool: 1234 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1235 1236 def _advance(self, times: int = 1) -> None: 1237 self._index += times 1238 self._curr = seq_get(self._tokens, self._index) 1239 self._next = seq_get(self._tokens, self._index + 1) 1240 1241 if self._index > 0: 1242 self._prev = self._tokens[self._index - 1] 1243 self._prev_comments = self._prev.comments 1244 else: 1245 self._prev = None 1246 self._prev_comments = None 1247 1248 def _retreat(self, index: int) -> None: 1249 if index != self._index: 1250 self._advance(index - self._index) 1251 1252 def _warn_unsupported(self) -> None: 1253 if len(self._tokens) <= 1: 1254 return 1255 1256 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1257 # interested in emitting a warning for the one being currently processed. 1258 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1259 1260 logger.warning( 1261 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1262 ) 1263 1264 def _parse_command(self) -> exp.Command: 1265 self._warn_unsupported() 1266 return self.expression( 1267 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1268 ) 1269 1270 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1271 start = self._prev 1272 exists = self._parse_exists() if allow_exists else None 1273 1274 self._match(TokenType.ON) 1275 1276 kind = self._match_set(self.CREATABLES) and self._prev 1277 if not kind: 1278 return self._parse_as_command(start) 1279 1280 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1281 this = self._parse_user_defined_function(kind=kind.token_type) 1282 elif kind.token_type == TokenType.TABLE: 1283 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1284 elif kind.token_type == TokenType.COLUMN: 1285 this = self._parse_column() 1286 else: 1287 this = self._parse_id_var() 1288 1289 self._match(TokenType.IS) 1290 1291 return self.expression( 1292 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1293 ) 1294 1295 def _parse_to_table( 1296 self, 1297 ) -> exp.ToTableProperty: 1298 table = self._parse_table_parts(schema=True) 1299 return self.expression(exp.ToTableProperty, this=table) 1300 1301 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1302 def _parse_ttl(self) -> exp.Expression: 1303 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1304 this = self._parse_bitwise() 1305 1306 if self._match_text_seq("DELETE"): 1307 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1308 if self._match_text_seq("RECOMPRESS"): 1309 return self.expression( 1310 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1311 ) 1312 if self._match_text_seq("TO", "DISK"): 1313 return self.expression( 1314 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1315 ) 1316 if self._match_text_seq("TO", "VOLUME"): 1317 return self.expression( 1318 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1319 ) 1320 1321 return this 1322 1323 expressions = self._parse_csv(_parse_ttl_action) 1324 where = self._parse_where() 1325 group = self._parse_group() 1326 1327 aggregates = None 1328 if group and self._match(TokenType.SET): 1329 aggregates = self._parse_csv(self._parse_set_item) 1330 1331 return self.expression( 1332 exp.MergeTreeTTL, 1333 expressions=expressions, 1334 where=where, 1335 group=group, 1336 aggregates=aggregates, 1337 ) 1338 1339 def _parse_statement(self) -> t.Optional[exp.Expression]: 1340 if self._curr is None: 1341 return None 1342 1343 if self._match_set(self.STATEMENT_PARSERS): 1344 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1345 1346 if self._match_set(Tokenizer.COMMANDS): 1347 return self._parse_command() 1348 1349 expression = self._parse_expression() 1350 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1351 return self._parse_query_modifiers(expression) 1352 1353 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1354 start = self._prev 1355 temporary = self._match(TokenType.TEMPORARY) 1356 materialized = self._match_text_seq("MATERIALIZED") 1357 1358 kind = self._match_set(self.CREATABLES) and self._prev.text 1359 if not kind: 1360 return self._parse_as_command(start) 1361 1362 return self.expression( 1363 exp.Drop, 1364 comments=start.comments, 1365 exists=exists or self._parse_exists(), 1366 this=self._parse_table( 1367 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1368 ), 1369 kind=kind, 1370 temporary=temporary, 1371 materialized=materialized, 1372 cascade=self._match_text_seq("CASCADE"), 1373 constraints=self._match_text_seq("CONSTRAINTS"), 1374 purge=self._match_text_seq("PURGE"), 1375 ) 1376 1377 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1378 return ( 1379 self._match_text_seq("IF") 1380 and (not not_ or self._match(TokenType.NOT)) 1381 and self._match(TokenType.EXISTS) 1382 ) 1383 1384 def _parse_create(self) -> exp.Create | exp.Command: 1385 # Note: this can't be None because we've matched a statement parser 1386 start = self._prev 1387 comments = self._prev_comments 1388 1389 replace = ( 1390 start.token_type == TokenType.REPLACE 1391 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1392 or self._match_pair(TokenType.OR, TokenType.ALTER) 1393 ) 1394 unique = self._match(TokenType.UNIQUE) 1395 1396 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1397 self._advance() 1398 1399 properties = None 1400 create_token = self._match_set(self.CREATABLES) and self._prev 1401 1402 if not create_token: 1403 # exp.Properties.Location.POST_CREATE 1404 properties = self._parse_properties() 1405 create_token = self._match_set(self.CREATABLES) and self._prev 1406 1407 if not properties or not create_token: 1408 return self._parse_as_command(start) 1409 1410 exists = self._parse_exists(not_=True) 1411 this = None 1412 expression: t.Optional[exp.Expression] = None 1413 indexes = None 1414 no_schema_binding = None 1415 begin = None 1416 end = None 1417 clone = None 1418 1419 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1420 nonlocal properties 1421 if properties and temp_props: 1422 properties.expressions.extend(temp_props.expressions) 1423 elif temp_props: 1424 properties = temp_props 1425 1426 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1427 this = self._parse_user_defined_function(kind=create_token.token_type) 1428 1429 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1430 extend_props(self._parse_properties()) 1431 1432 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1433 1434 if not expression: 1435 if self._match(TokenType.COMMAND): 1436 expression = self._parse_as_command(self._prev) 1437 else: 1438 begin = self._match(TokenType.BEGIN) 1439 return_ = self._match_text_seq("RETURN") 1440 1441 if self._match(TokenType.STRING, advance=False): 1442 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1443 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1444 expression = self._parse_string() 1445 extend_props(self._parse_properties()) 1446 else: 1447 expression = self._parse_statement() 1448 1449 end = self._match_text_seq("END") 1450 1451 if return_: 1452 expression = self.expression(exp.Return, this=expression) 1453 elif create_token.token_type == TokenType.INDEX: 1454 this = self._parse_index(index=self._parse_id_var()) 1455 elif create_token.token_type in self.DB_CREATABLES: 1456 table_parts = self._parse_table_parts( 1457 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1458 ) 1459 1460 # exp.Properties.Location.POST_NAME 1461 self._match(TokenType.COMMA) 1462 extend_props(self._parse_properties(before=True)) 1463 1464 this = self._parse_schema(this=table_parts) 1465 1466 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1467 extend_props(self._parse_properties()) 1468 1469 self._match(TokenType.ALIAS) 1470 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1471 # exp.Properties.Location.POST_ALIAS 1472 extend_props(self._parse_properties()) 1473 1474 expression = self._parse_ddl_select() 1475 1476 if create_token.token_type == TokenType.TABLE: 1477 # exp.Properties.Location.POST_EXPRESSION 1478 extend_props(self._parse_properties()) 1479 1480 indexes = [] 1481 while True: 1482 index = self._parse_index() 1483 1484 # exp.Properties.Location.POST_INDEX 1485 extend_props(self._parse_properties()) 1486 1487 if not index: 1488 break 1489 else: 1490 self._match(TokenType.COMMA) 1491 indexes.append(index) 1492 elif create_token.token_type == TokenType.VIEW: 1493 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1494 no_schema_binding = True 1495 1496 shallow = self._match_text_seq("SHALLOW") 1497 1498 if self._match_texts(self.CLONE_KEYWORDS): 1499 copy = self._prev.text.lower() == "copy" 1500 clone = self.expression( 1501 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1502 ) 1503 1504 if self._curr: 1505 return self._parse_as_command(start) 1506 1507 return self.expression( 1508 exp.Create, 1509 comments=comments, 1510 this=this, 1511 kind=create_token.text.upper(), 1512 replace=replace, 1513 unique=unique, 1514 expression=expression, 1515 exists=exists, 1516 properties=properties, 1517 indexes=indexes, 1518 no_schema_binding=no_schema_binding, 1519 begin=begin, 1520 end=end, 1521 clone=clone, 1522 ) 1523 1524 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1525 # only used for teradata currently 1526 self._match(TokenType.COMMA) 1527 1528 kwargs = { 1529 "no": self._match_text_seq("NO"), 1530 "dual": self._match_text_seq("DUAL"), 1531 "before": self._match_text_seq("BEFORE"), 1532 "default": self._match_text_seq("DEFAULT"), 1533 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1534 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1535 "after": self._match_text_seq("AFTER"), 1536 "minimum": self._match_texts(("MIN", "MINIMUM")), 1537 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1538 } 1539 1540 if self._match_texts(self.PROPERTY_PARSERS): 1541 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1542 try: 1543 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1544 except TypeError: 1545 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1546 1547 return None 1548 1549 def _parse_property(self) -> t.Optional[exp.Expression]: 1550 if self._match_texts(self.PROPERTY_PARSERS): 1551 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1552 1553 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1554 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1555 1556 if self._match_text_seq("COMPOUND", "SORTKEY"): 1557 return self._parse_sortkey(compound=True) 1558 1559 if self._match_text_seq("SQL", "SECURITY"): 1560 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1561 1562 index = self._index 1563 key = self._parse_column() 1564 1565 if not self._match(TokenType.EQ): 1566 self._retreat(index) 1567 return None 1568 1569 return self.expression( 1570 exp.Property, 1571 this=key.to_dot() if isinstance(key, exp.Column) else key, 1572 value=self._parse_column() or self._parse_var(any_token=True), 1573 ) 1574 1575 def _parse_stored(self) -> exp.FileFormatProperty: 1576 self._match(TokenType.ALIAS) 1577 1578 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1579 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1580 1581 return self.expression( 1582 exp.FileFormatProperty, 1583 this=( 1584 self.expression( 1585 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1586 ) 1587 if input_format or output_format 1588 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1589 ), 1590 ) 1591 1592 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1593 self._match(TokenType.EQ) 1594 self._match(TokenType.ALIAS) 1595 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1596 1597 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1598 properties = [] 1599 while True: 1600 if before: 1601 prop = self._parse_property_before() 1602 else: 1603 prop = self._parse_property() 1604 1605 if not prop: 1606 break 1607 for p in ensure_list(prop): 1608 properties.append(p) 1609 1610 if properties: 1611 return self.expression(exp.Properties, expressions=properties) 1612 1613 return None 1614 1615 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1616 return self.expression( 1617 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1618 ) 1619 1620 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1621 if self._index >= 2: 1622 pre_volatile_token = self._tokens[self._index - 2] 1623 else: 1624 pre_volatile_token = None 1625 1626 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1627 return exp.VolatileProperty() 1628 1629 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1630 1631 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1632 self._match_pair(TokenType.EQ, TokenType.ON) 1633 1634 prop = self.expression(exp.WithSystemVersioningProperty) 1635 if self._match(TokenType.L_PAREN): 1636 self._match_text_seq("HISTORY_TABLE", "=") 1637 prop.set("this", self._parse_table_parts()) 1638 1639 if self._match(TokenType.COMMA): 1640 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1641 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1642 1643 self._match_r_paren() 1644 1645 return prop 1646 1647 def _parse_with_property( 1648 self, 1649 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1650 if self._match(TokenType.L_PAREN, advance=False): 1651 return self._parse_wrapped_csv(self._parse_property) 1652 1653 if self._match_text_seq("JOURNAL"): 1654 return self._parse_withjournaltable() 1655 1656 if self._match_text_seq("DATA"): 1657 return self._parse_withdata(no=False) 1658 elif self._match_text_seq("NO", "DATA"): 1659 return self._parse_withdata(no=True) 1660 1661 if not self._next: 1662 return None 1663 1664 return self._parse_withisolatedloading() 1665 1666 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1667 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1668 self._match(TokenType.EQ) 1669 1670 user = self._parse_id_var() 1671 self._match(TokenType.PARAMETER) 1672 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1673 1674 if not user or not host: 1675 return None 1676 1677 return exp.DefinerProperty(this=f"{user}@{host}") 1678 1679 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1680 self._match(TokenType.TABLE) 1681 self._match(TokenType.EQ) 1682 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1683 1684 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1685 return self.expression(exp.LogProperty, no=no) 1686 1687 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1688 return self.expression(exp.JournalProperty, **kwargs) 1689 1690 def _parse_checksum(self) -> exp.ChecksumProperty: 1691 self._match(TokenType.EQ) 1692 1693 on = None 1694 if self._match(TokenType.ON): 1695 on = True 1696 elif self._match_text_seq("OFF"): 1697 on = False 1698 1699 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1700 1701 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1702 return self.expression( 1703 exp.Cluster, 1704 expressions=( 1705 self._parse_wrapped_csv(self._parse_ordered) 1706 if wrapped 1707 else self._parse_csv(self._parse_ordered) 1708 ), 1709 ) 1710 1711 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1712 self._match_text_seq("BY") 1713 1714 self._match_l_paren() 1715 expressions = self._parse_csv(self._parse_column) 1716 self._match_r_paren() 1717 1718 if self._match_text_seq("SORTED", "BY"): 1719 self._match_l_paren() 1720 sorted_by = self._parse_csv(self._parse_ordered) 1721 self._match_r_paren() 1722 else: 1723 sorted_by = None 1724 1725 self._match(TokenType.INTO) 1726 buckets = self._parse_number() 1727 self._match_text_seq("BUCKETS") 1728 1729 return self.expression( 1730 exp.ClusteredByProperty, 1731 expressions=expressions, 1732 sorted_by=sorted_by, 1733 buckets=buckets, 1734 ) 1735 1736 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1737 if not self._match_text_seq("GRANTS"): 1738 self._retreat(self._index - 1) 1739 return None 1740 1741 return self.expression(exp.CopyGrantsProperty) 1742 1743 def _parse_freespace(self) -> exp.FreespaceProperty: 1744 self._match(TokenType.EQ) 1745 return self.expression( 1746 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1747 ) 1748 1749 def _parse_mergeblockratio( 1750 self, no: bool = False, default: bool = False 1751 ) -> exp.MergeBlockRatioProperty: 1752 if self._match(TokenType.EQ): 1753 return self.expression( 1754 exp.MergeBlockRatioProperty, 1755 this=self._parse_number(), 1756 percent=self._match(TokenType.PERCENT), 1757 ) 1758 1759 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1760 1761 def _parse_datablocksize( 1762 self, 1763 default: t.Optional[bool] = None, 1764 minimum: t.Optional[bool] = None, 1765 maximum: t.Optional[bool] = None, 1766 ) -> exp.DataBlocksizeProperty: 1767 self._match(TokenType.EQ) 1768 size = self._parse_number() 1769 1770 units = None 1771 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1772 units = self._prev.text 1773 1774 return self.expression( 1775 exp.DataBlocksizeProperty, 1776 size=size, 1777 units=units, 1778 default=default, 1779 minimum=minimum, 1780 maximum=maximum, 1781 ) 1782 1783 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1784 self._match(TokenType.EQ) 1785 always = self._match_text_seq("ALWAYS") 1786 manual = self._match_text_seq("MANUAL") 1787 never = self._match_text_seq("NEVER") 1788 default = self._match_text_seq("DEFAULT") 1789 1790 autotemp = None 1791 if self._match_text_seq("AUTOTEMP"): 1792 autotemp = self._parse_schema() 1793 1794 return self.expression( 1795 exp.BlockCompressionProperty, 1796 always=always, 1797 manual=manual, 1798 never=never, 1799 default=default, 1800 autotemp=autotemp, 1801 ) 1802 1803 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1804 no = self._match_text_seq("NO") 1805 concurrent = self._match_text_seq("CONCURRENT") 1806 self._match_text_seq("ISOLATED", "LOADING") 1807 for_all = self._match_text_seq("FOR", "ALL") 1808 for_insert = self._match_text_seq("FOR", "INSERT") 1809 for_none = self._match_text_seq("FOR", "NONE") 1810 return self.expression( 1811 exp.IsolatedLoadingProperty, 1812 no=no, 1813 concurrent=concurrent, 1814 for_all=for_all, 1815 for_insert=for_insert, 1816 for_none=for_none, 1817 ) 1818 1819 def _parse_locking(self) -> exp.LockingProperty: 1820 if self._match(TokenType.TABLE): 1821 kind = "TABLE" 1822 elif self._match(TokenType.VIEW): 1823 kind = "VIEW" 1824 elif self._match(TokenType.ROW): 1825 kind = "ROW" 1826 elif self._match_text_seq("DATABASE"): 1827 kind = "DATABASE" 1828 else: 1829 kind = None 1830 1831 if kind in ("DATABASE", "TABLE", "VIEW"): 1832 this = self._parse_table_parts() 1833 else: 1834 this = None 1835 1836 if self._match(TokenType.FOR): 1837 for_or_in = "FOR" 1838 elif self._match(TokenType.IN): 1839 for_or_in = "IN" 1840 else: 1841 for_or_in = None 1842 1843 if self._match_text_seq("ACCESS"): 1844 lock_type = "ACCESS" 1845 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1846 lock_type = "EXCLUSIVE" 1847 elif self._match_text_seq("SHARE"): 1848 lock_type = "SHARE" 1849 elif self._match_text_seq("READ"): 1850 lock_type = "READ" 1851 elif self._match_text_seq("WRITE"): 1852 lock_type = "WRITE" 1853 elif self._match_text_seq("CHECKSUM"): 1854 lock_type = "CHECKSUM" 1855 else: 1856 lock_type = None 1857 1858 override = self._match_text_seq("OVERRIDE") 1859 1860 return self.expression( 1861 exp.LockingProperty, 1862 this=this, 1863 kind=kind, 1864 for_or_in=for_or_in, 1865 lock_type=lock_type, 1866 override=override, 1867 ) 1868 1869 def _parse_partition_by(self) -> t.List[exp.Expression]: 1870 if self._match(TokenType.PARTITION_BY): 1871 return self._parse_csv(self._parse_conjunction) 1872 return [] 1873 1874 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1875 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1876 if self._match_text_seq("MINVALUE"): 1877 return exp.var("MINVALUE") 1878 if self._match_text_seq("MAXVALUE"): 1879 return exp.var("MAXVALUE") 1880 return self._parse_bitwise() 1881 1882 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1883 expression = None 1884 from_expressions = None 1885 to_expressions = None 1886 1887 if self._match(TokenType.IN): 1888 this = self._parse_wrapped_csv(self._parse_bitwise) 1889 elif self._match(TokenType.FROM): 1890 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1891 self._match_text_seq("TO") 1892 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1893 elif self._match_text_seq("WITH", "(", "MODULUS"): 1894 this = self._parse_number() 1895 self._match_text_seq(",", "REMAINDER") 1896 expression = self._parse_number() 1897 self._match_r_paren() 1898 else: 1899 self.raise_error("Failed to parse partition bound spec.") 1900 1901 return self.expression( 1902 exp.PartitionBoundSpec, 1903 this=this, 1904 expression=expression, 1905 from_expressions=from_expressions, 1906 to_expressions=to_expressions, 1907 ) 1908 1909 # https://www.postgresql.org/docs/current/sql-createtable.html 1910 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1911 if not self._match_text_seq("OF"): 1912 self._retreat(self._index - 1) 1913 return None 1914 1915 this = self._parse_table(schema=True) 1916 1917 if self._match(TokenType.DEFAULT): 1918 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1919 elif self._match_text_seq("FOR", "VALUES"): 1920 expression = self._parse_partition_bound_spec() 1921 else: 1922 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1923 1924 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1925 1926 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1927 self._match(TokenType.EQ) 1928 return self.expression( 1929 exp.PartitionedByProperty, 1930 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1931 ) 1932 1933 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1934 if self._match_text_seq("AND", "STATISTICS"): 1935 statistics = True 1936 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1937 statistics = False 1938 else: 1939 statistics = None 1940 1941 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1942 1943 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1944 if self._match_text_seq("SQL"): 1945 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1946 return None 1947 1948 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1949 if self._match_text_seq("SQL", "DATA"): 1950 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1951 return None 1952 1953 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1954 if self._match_text_seq("PRIMARY", "INDEX"): 1955 return exp.NoPrimaryIndexProperty() 1956 if self._match_text_seq("SQL"): 1957 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1958 return None 1959 1960 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1961 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1962 return exp.OnCommitProperty() 1963 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1964 return exp.OnCommitProperty(delete=True) 1965 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1966 1967 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1968 if self._match_text_seq("SQL", "DATA"): 1969 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1970 return None 1971 1972 def _parse_distkey(self) -> exp.DistKeyProperty: 1973 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1974 1975 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1976 table = self._parse_table(schema=True) 1977 1978 options = [] 1979 while self._match_texts(("INCLUDING", "EXCLUDING")): 1980 this = self._prev.text.upper() 1981 1982 id_var = self._parse_id_var() 1983 if not id_var: 1984 return None 1985 1986 options.append( 1987 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1988 ) 1989 1990 return self.expression(exp.LikeProperty, this=table, expressions=options) 1991 1992 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1993 return self.expression( 1994 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1995 ) 1996 1997 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1998 self._match(TokenType.EQ) 1999 return self.expression( 2000 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2001 ) 2002 2003 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2004 self._match_text_seq("WITH", "CONNECTION") 2005 return self.expression( 2006 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2007 ) 2008 2009 def _parse_returns(self) -> exp.ReturnsProperty: 2010 value: t.Optional[exp.Expression] 2011 is_table = self._match(TokenType.TABLE) 2012 2013 if is_table: 2014 if self._match(TokenType.LT): 2015 value = self.expression( 2016 exp.Schema, 2017 this="TABLE", 2018 expressions=self._parse_csv(self._parse_struct_types), 2019 ) 2020 if not self._match(TokenType.GT): 2021 self.raise_error("Expecting >") 2022 else: 2023 value = self._parse_schema(exp.var("TABLE")) 2024 else: 2025 value = self._parse_types() 2026 2027 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2028 2029 def _parse_describe(self) -> exp.Describe: 2030 kind = self._match_set(self.CREATABLES) and self._prev.text 2031 extended = self._match_text_seq("EXTENDED") 2032 this = self._parse_table(schema=True) 2033 properties = self._parse_properties() 2034 expressions = properties.expressions if properties else None 2035 return self.expression( 2036 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2037 ) 2038 2039 def _parse_insert(self) -> exp.Insert: 2040 comments = ensure_list(self._prev_comments) 2041 overwrite = self._match(TokenType.OVERWRITE) 2042 ignore = self._match(TokenType.IGNORE) 2043 local = self._match_text_seq("LOCAL") 2044 alternative = None 2045 2046 if self._match_text_seq("DIRECTORY"): 2047 this: t.Optional[exp.Expression] = self.expression( 2048 exp.Directory, 2049 this=self._parse_var_or_string(), 2050 local=local, 2051 row_format=self._parse_row_format(match_row=True), 2052 ) 2053 else: 2054 if self._match(TokenType.OR): 2055 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2056 2057 self._match(TokenType.INTO) 2058 comments += ensure_list(self._prev_comments) 2059 self._match(TokenType.TABLE) 2060 this = self._parse_table(schema=True) 2061 2062 returning = self._parse_returning() 2063 2064 return self.expression( 2065 exp.Insert, 2066 comments=comments, 2067 this=this, 2068 by_name=self._match_text_seq("BY", "NAME"), 2069 exists=self._parse_exists(), 2070 partition=self._parse_partition(), 2071 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2072 and self._parse_conjunction(), 2073 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2074 conflict=self._parse_on_conflict(), 2075 returning=returning or self._parse_returning(), 2076 overwrite=overwrite, 2077 alternative=alternative, 2078 ignore=ignore, 2079 ) 2080 2081 def _parse_kill(self) -> exp.Kill: 2082 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2083 2084 return self.expression( 2085 exp.Kill, 2086 this=self._parse_primary(), 2087 kind=kind, 2088 ) 2089 2090 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2091 conflict = self._match_text_seq("ON", "CONFLICT") 2092 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2093 2094 if not conflict and not duplicate: 2095 return None 2096 2097 nothing = None 2098 expressions = None 2099 key = None 2100 constraint = None 2101 2102 if conflict: 2103 if self._match_text_seq("ON", "CONSTRAINT"): 2104 constraint = self._parse_id_var() 2105 else: 2106 key = self._parse_csv(self._parse_value) 2107 2108 self._match_text_seq("DO") 2109 if self._match_text_seq("NOTHING"): 2110 nothing = True 2111 else: 2112 self._match(TokenType.UPDATE) 2113 self._match(TokenType.SET) 2114 expressions = self._parse_csv(self._parse_equality) 2115 2116 return self.expression( 2117 exp.OnConflict, 2118 duplicate=duplicate, 2119 expressions=expressions, 2120 nothing=nothing, 2121 key=key, 2122 constraint=constraint, 2123 ) 2124 2125 def _parse_returning(self) -> t.Optional[exp.Returning]: 2126 if not self._match(TokenType.RETURNING): 2127 return None 2128 return self.expression( 2129 exp.Returning, 2130 expressions=self._parse_csv(self._parse_expression), 2131 into=self._match(TokenType.INTO) and self._parse_table_part(), 2132 ) 2133 2134 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2135 if not self._match(TokenType.FORMAT): 2136 return None 2137 return self._parse_row_format() 2138 2139 def _parse_row_format( 2140 self, match_row: bool = False 2141 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2142 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2143 return None 2144 2145 if self._match_text_seq("SERDE"): 2146 this = self._parse_string() 2147 2148 serde_properties = None 2149 if self._match(TokenType.SERDE_PROPERTIES): 2150 serde_properties = self.expression( 2151 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2152 ) 2153 2154 return self.expression( 2155 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2156 ) 2157 2158 self._match_text_seq("DELIMITED") 2159 2160 kwargs = {} 2161 2162 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2163 kwargs["fields"] = self._parse_string() 2164 if self._match_text_seq("ESCAPED", "BY"): 2165 kwargs["escaped"] = self._parse_string() 2166 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2167 kwargs["collection_items"] = self._parse_string() 2168 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2169 kwargs["map_keys"] = self._parse_string() 2170 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2171 kwargs["lines"] = self._parse_string() 2172 if self._match_text_seq("NULL", "DEFINED", "AS"): 2173 kwargs["null"] = self._parse_string() 2174 2175 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2176 2177 def _parse_load(self) -> exp.LoadData | exp.Command: 2178 if self._match_text_seq("DATA"): 2179 local = self._match_text_seq("LOCAL") 2180 self._match_text_seq("INPATH") 2181 inpath = self._parse_string() 2182 overwrite = self._match(TokenType.OVERWRITE) 2183 self._match_pair(TokenType.INTO, TokenType.TABLE) 2184 2185 return self.expression( 2186 exp.LoadData, 2187 this=self._parse_table(schema=True), 2188 local=local, 2189 overwrite=overwrite, 2190 inpath=inpath, 2191 partition=self._parse_partition(), 2192 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2193 serde=self._match_text_seq("SERDE") and self._parse_string(), 2194 ) 2195 return self._parse_as_command(self._prev) 2196 2197 def _parse_delete(self) -> exp.Delete: 2198 # This handles MySQL's "Multiple-Table Syntax" 2199 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2200 tables = None 2201 comments = self._prev_comments 2202 if not self._match(TokenType.FROM, advance=False): 2203 tables = self._parse_csv(self._parse_table) or None 2204 2205 returning = self._parse_returning() 2206 2207 return self.expression( 2208 exp.Delete, 2209 comments=comments, 2210 tables=tables, 2211 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2212 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2213 where=self._parse_where(), 2214 returning=returning or self._parse_returning(), 2215 limit=self._parse_limit(), 2216 ) 2217 2218 def _parse_update(self) -> exp.Update: 2219 comments = self._prev_comments 2220 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2221 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2222 returning = self._parse_returning() 2223 return self.expression( 2224 exp.Update, 2225 comments=comments, 2226 **{ # type: ignore 2227 "this": this, 2228 "expressions": expressions, 2229 "from": self._parse_from(joins=True), 2230 "where": self._parse_where(), 2231 "returning": returning or self._parse_returning(), 2232 "order": self._parse_order(), 2233 "limit": self._parse_limit(), 2234 }, 2235 ) 2236 2237 def _parse_uncache(self) -> exp.Uncache: 2238 if not self._match(TokenType.TABLE): 2239 self.raise_error("Expecting TABLE after UNCACHE") 2240 2241 return self.expression( 2242 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2243 ) 2244 2245 def _parse_cache(self) -> exp.Cache: 2246 lazy = self._match_text_seq("LAZY") 2247 self._match(TokenType.TABLE) 2248 table = self._parse_table(schema=True) 2249 2250 options = [] 2251 if self._match_text_seq("OPTIONS"): 2252 self._match_l_paren() 2253 k = self._parse_string() 2254 self._match(TokenType.EQ) 2255 v = self._parse_string() 2256 options = [k, v] 2257 self._match_r_paren() 2258 2259 self._match(TokenType.ALIAS) 2260 return self.expression( 2261 exp.Cache, 2262 this=table, 2263 lazy=lazy, 2264 options=options, 2265 expression=self._parse_select(nested=True), 2266 ) 2267 2268 def _parse_partition(self) -> t.Optional[exp.Partition]: 2269 if not self._match(TokenType.PARTITION): 2270 return None 2271 2272 return self.expression( 2273 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2274 ) 2275 2276 def _parse_value(self) -> exp.Tuple: 2277 if self._match(TokenType.L_PAREN): 2278 expressions = self._parse_csv(self._parse_expression) 2279 self._match_r_paren() 2280 return self.expression(exp.Tuple, expressions=expressions) 2281 2282 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2283 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2284 2285 def _parse_projections(self) -> t.List[exp.Expression]: 2286 return self._parse_expressions() 2287 2288 def _parse_select( 2289 self, 2290 nested: bool = False, 2291 table: bool = False, 2292 parse_subquery_alias: bool = True, 2293 parse_set_operation: bool = True, 2294 ) -> t.Optional[exp.Expression]: 2295 cte = self._parse_with() 2296 2297 if cte: 2298 this = self._parse_statement() 2299 2300 if not this: 2301 self.raise_error("Failed to parse any statement following CTE") 2302 return cte 2303 2304 if "with" in this.arg_types: 2305 this.set("with", cte) 2306 else: 2307 self.raise_error(f"{this.key} does not support CTE") 2308 this = cte 2309 2310 return this 2311 2312 # duckdb supports leading with FROM x 2313 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2314 2315 if self._match(TokenType.SELECT): 2316 comments = self._prev_comments 2317 2318 hint = self._parse_hint() 2319 all_ = self._match(TokenType.ALL) 2320 distinct = self._match_set(self.DISTINCT_TOKENS) 2321 2322 kind = ( 2323 self._match(TokenType.ALIAS) 2324 and self._match_texts(("STRUCT", "VALUE")) 2325 and self._prev.text.upper() 2326 ) 2327 2328 if distinct: 2329 distinct = self.expression( 2330 exp.Distinct, 2331 on=self._parse_value() if self._match(TokenType.ON) else None, 2332 ) 2333 2334 if all_ and distinct: 2335 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2336 2337 limit = self._parse_limit(top=True) 2338 projections = self._parse_projections() 2339 2340 this = self.expression( 2341 exp.Select, 2342 kind=kind, 2343 hint=hint, 2344 distinct=distinct, 2345 expressions=projections, 2346 limit=limit, 2347 ) 2348 this.comments = comments 2349 2350 into = self._parse_into() 2351 if into: 2352 this.set("into", into) 2353 2354 if not from_: 2355 from_ = self._parse_from() 2356 2357 if from_: 2358 this.set("from", from_) 2359 2360 this = self._parse_query_modifiers(this) 2361 elif (table or nested) and self._match(TokenType.L_PAREN): 2362 if self._match(TokenType.PIVOT): 2363 this = self._parse_simplified_pivot() 2364 elif self._match(TokenType.FROM): 2365 this = exp.select("*").from_( 2366 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2367 ) 2368 else: 2369 this = ( 2370 self._parse_table() 2371 if table 2372 else self._parse_select(nested=True, parse_set_operation=False) 2373 ) 2374 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2375 2376 self._match_r_paren() 2377 2378 # We return early here so that the UNION isn't attached to the subquery by the 2379 # following call to _parse_set_operations, but instead becomes the parent node 2380 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2381 elif self._match(TokenType.VALUES, advance=False): 2382 this = self._parse_derived_table_values() 2383 elif from_: 2384 this = exp.select("*").from_(from_.this, copy=False) 2385 else: 2386 this = None 2387 2388 if parse_set_operation: 2389 return self._parse_set_operations(this) 2390 return this 2391 2392 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2393 if not skip_with_token and not self._match(TokenType.WITH): 2394 return None 2395 2396 comments = self._prev_comments 2397 recursive = self._match(TokenType.RECURSIVE) 2398 2399 expressions = [] 2400 while True: 2401 expressions.append(self._parse_cte()) 2402 2403 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2404 break 2405 else: 2406 self._match(TokenType.WITH) 2407 2408 return self.expression( 2409 exp.With, comments=comments, expressions=expressions, recursive=recursive 2410 ) 2411 2412 def _parse_cte(self) -> exp.CTE: 2413 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2414 if not alias or not alias.this: 2415 self.raise_error("Expected CTE to have alias") 2416 2417 self._match(TokenType.ALIAS) 2418 return self.expression( 2419 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2420 ) 2421 2422 def _parse_table_alias( 2423 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2424 ) -> t.Optional[exp.TableAlias]: 2425 any_token = self._match(TokenType.ALIAS) 2426 alias = ( 2427 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2428 or self._parse_string_as_identifier() 2429 ) 2430 2431 index = self._index 2432 if self._match(TokenType.L_PAREN): 2433 columns = self._parse_csv(self._parse_function_parameter) 2434 self._match_r_paren() if columns else self._retreat(index) 2435 else: 2436 columns = None 2437 2438 if not alias and not columns: 2439 return None 2440 2441 return self.expression(exp.TableAlias, this=alias, columns=columns) 2442 2443 def _parse_subquery( 2444 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2445 ) -> t.Optional[exp.Subquery]: 2446 if not this: 2447 return None 2448 2449 return self.expression( 2450 exp.Subquery, 2451 this=this, 2452 pivots=self._parse_pivots(), 2453 alias=self._parse_table_alias() if parse_alias else None, 2454 ) 2455 2456 def _parse_query_modifiers( 2457 self, this: t.Optional[exp.Expression] 2458 ) -> t.Optional[exp.Expression]: 2459 if isinstance(this, self.MODIFIABLES): 2460 for join in iter(self._parse_join, None): 2461 this.append("joins", join) 2462 for lateral in iter(self._parse_lateral, None): 2463 this.append("laterals", lateral) 2464 2465 while True: 2466 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2467 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2468 key, expression = parser(self) 2469 2470 if expression: 2471 this.set(key, expression) 2472 if key == "limit": 2473 offset = expression.args.pop("offset", None) 2474 2475 if offset: 2476 offset = exp.Offset(expression=offset) 2477 this.set("offset", offset) 2478 2479 limit_by_expressions = expression.expressions 2480 expression.set("expressions", None) 2481 offset.set("expressions", limit_by_expressions) 2482 continue 2483 break 2484 return this 2485 2486 def _parse_hint(self) -> t.Optional[exp.Hint]: 2487 if self._match(TokenType.HINT): 2488 hints = [] 2489 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2490 hints.extend(hint) 2491 2492 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2493 self.raise_error("Expected */ after HINT") 2494 2495 return self.expression(exp.Hint, expressions=hints) 2496 2497 return None 2498 2499 def _parse_into(self) -> t.Optional[exp.Into]: 2500 if not self._match(TokenType.INTO): 2501 return None 2502 2503 temp = self._match(TokenType.TEMPORARY) 2504 unlogged = self._match_text_seq("UNLOGGED") 2505 self._match(TokenType.TABLE) 2506 2507 return self.expression( 2508 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2509 ) 2510 2511 def _parse_from( 2512 self, joins: bool = False, skip_from_token: bool = False 2513 ) -> t.Optional[exp.From]: 2514 if not skip_from_token and not self._match(TokenType.FROM): 2515 return None 2516 2517 return self.expression( 2518 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2519 ) 2520 2521 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2522 if not self._match(TokenType.MATCH_RECOGNIZE): 2523 return None 2524 2525 self._match_l_paren() 2526 2527 partition = self._parse_partition_by() 2528 order = self._parse_order() 2529 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2530 2531 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2532 rows = exp.var("ONE ROW PER MATCH") 2533 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2534 text = "ALL ROWS PER MATCH" 2535 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2536 text += " SHOW EMPTY MATCHES" 2537 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2538 text += " OMIT EMPTY MATCHES" 2539 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2540 text += " WITH UNMATCHED ROWS" 2541 rows = exp.var(text) 2542 else: 2543 rows = None 2544 2545 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2546 text = "AFTER MATCH SKIP" 2547 if self._match_text_seq("PAST", "LAST", "ROW"): 2548 text += " PAST LAST ROW" 2549 elif self._match_text_seq("TO", "NEXT", "ROW"): 2550 text += " TO NEXT ROW" 2551 elif self._match_text_seq("TO", "FIRST"): 2552 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2553 elif self._match_text_seq("TO", "LAST"): 2554 text += f" TO LAST {self._advance_any().text}" # type: ignore 2555 after = exp.var(text) 2556 else: 2557 after = None 2558 2559 if self._match_text_seq("PATTERN"): 2560 self._match_l_paren() 2561 2562 if not self._curr: 2563 self.raise_error("Expecting )", self._curr) 2564 2565 paren = 1 2566 start = self._curr 2567 2568 while self._curr and paren > 0: 2569 if self._curr.token_type == TokenType.L_PAREN: 2570 paren += 1 2571 if self._curr.token_type == TokenType.R_PAREN: 2572 paren -= 1 2573 2574 end = self._prev 2575 self._advance() 2576 2577 if paren > 0: 2578 self.raise_error("Expecting )", self._curr) 2579 2580 pattern = exp.var(self._find_sql(start, end)) 2581 else: 2582 pattern = None 2583 2584 define = ( 2585 self._parse_csv(self._parse_name_as_expression) 2586 if self._match_text_seq("DEFINE") 2587 else None 2588 ) 2589 2590 self._match_r_paren() 2591 2592 return self.expression( 2593 exp.MatchRecognize, 2594 partition_by=partition, 2595 order=order, 2596 measures=measures, 2597 rows=rows, 2598 after=after, 2599 pattern=pattern, 2600 define=define, 2601 alias=self._parse_table_alias(), 2602 ) 2603 2604 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2605 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2606 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2607 cross_apply = False 2608 2609 if cross_apply is not None: 2610 this = self._parse_select(table=True) 2611 view = None 2612 outer = None 2613 elif self._match(TokenType.LATERAL): 2614 this = self._parse_select(table=True) 2615 view = self._match(TokenType.VIEW) 2616 outer = self._match(TokenType.OUTER) 2617 else: 2618 return None 2619 2620 if not this: 2621 this = ( 2622 self._parse_unnest() 2623 or self._parse_function() 2624 or self._parse_id_var(any_token=False) 2625 ) 2626 2627 while self._match(TokenType.DOT): 2628 this = exp.Dot( 2629 this=this, 2630 expression=self._parse_function() or self._parse_id_var(any_token=False), 2631 ) 2632 2633 if view: 2634 table = self._parse_id_var(any_token=False) 2635 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2636 table_alias: t.Optional[exp.TableAlias] = self.expression( 2637 exp.TableAlias, this=table, columns=columns 2638 ) 2639 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2640 # We move the alias from the lateral's child node to the lateral itself 2641 table_alias = this.args["alias"].pop() 2642 else: 2643 table_alias = self._parse_table_alias() 2644 2645 return self.expression( 2646 exp.Lateral, 2647 this=this, 2648 view=view, 2649 outer=outer, 2650 alias=table_alias, 2651 cross_apply=cross_apply, 2652 ) 2653 2654 def _parse_join_parts( 2655 self, 2656 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2657 return ( 2658 self._match_set(self.JOIN_METHODS) and self._prev, 2659 self._match_set(self.JOIN_SIDES) and self._prev, 2660 self._match_set(self.JOIN_KINDS) and self._prev, 2661 ) 2662 2663 def _parse_join( 2664 self, skip_join_token: bool = False, parse_bracket: bool = False 2665 ) -> t.Optional[exp.Join]: 2666 if self._match(TokenType.COMMA): 2667 return self.expression(exp.Join, this=self._parse_table()) 2668 2669 index = self._index 2670 method, side, kind = self._parse_join_parts() 2671 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2672 join = self._match(TokenType.JOIN) 2673 2674 if not skip_join_token and not join: 2675 self._retreat(index) 2676 kind = None 2677 method = None 2678 side = None 2679 2680 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2681 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2682 2683 if not skip_join_token and not join and not outer_apply and not cross_apply: 2684 return None 2685 2686 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2687 2688 if method: 2689 kwargs["method"] = method.text 2690 if side: 2691 kwargs["side"] = side.text 2692 if kind: 2693 kwargs["kind"] = kind.text 2694 if hint: 2695 kwargs["hint"] = hint 2696 2697 if self._match(TokenType.ON): 2698 kwargs["on"] = self._parse_conjunction() 2699 elif self._match(TokenType.USING): 2700 kwargs["using"] = self._parse_wrapped_id_vars() 2701 elif not (kind and kind.token_type == TokenType.CROSS): 2702 index = self._index 2703 join = self._parse_join() 2704 2705 if join and self._match(TokenType.ON): 2706 kwargs["on"] = self._parse_conjunction() 2707 elif join and self._match(TokenType.USING): 2708 kwargs["using"] = self._parse_wrapped_id_vars() 2709 else: 2710 join = None 2711 self._retreat(index) 2712 2713 kwargs["this"].set("joins", [join] if join else None) 2714 2715 comments = [c for token in (method, side, kind) if token for c in token.comments] 2716 return self.expression(exp.Join, comments=comments, **kwargs) 2717 2718 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2719 this = self._parse_conjunction() 2720 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2721 return this 2722 2723 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2724 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2725 2726 return this 2727 2728 def _parse_index( 2729 self, 2730 index: t.Optional[exp.Expression] = None, 2731 ) -> t.Optional[exp.Index]: 2732 if index: 2733 unique = None 2734 primary = None 2735 amp = None 2736 2737 self._match(TokenType.ON) 2738 self._match(TokenType.TABLE) # hive 2739 table = self._parse_table_parts(schema=True) 2740 else: 2741 unique = self._match(TokenType.UNIQUE) 2742 primary = self._match_text_seq("PRIMARY") 2743 amp = self._match_text_seq("AMP") 2744 2745 if not self._match(TokenType.INDEX): 2746 return None 2747 2748 index = self._parse_id_var() 2749 table = None 2750 2751 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2752 2753 if self._match(TokenType.L_PAREN, advance=False): 2754 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2755 else: 2756 columns = None 2757 2758 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2759 2760 return self.expression( 2761 exp.Index, 2762 this=index, 2763 table=table, 2764 using=using, 2765 columns=columns, 2766 unique=unique, 2767 primary=primary, 2768 amp=amp, 2769 include=include, 2770 partition_by=self._parse_partition_by(), 2771 where=self._parse_where(), 2772 ) 2773 2774 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2775 hints: t.List[exp.Expression] = [] 2776 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2777 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2778 hints.append( 2779 self.expression( 2780 exp.WithTableHint, 2781 expressions=self._parse_csv( 2782 lambda: self._parse_function() or self._parse_var(any_token=True) 2783 ), 2784 ) 2785 ) 2786 self._match_r_paren() 2787 else: 2788 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2789 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2790 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2791 2792 self._match_texts(("INDEX", "KEY")) 2793 if self._match(TokenType.FOR): 2794 hint.set("target", self._advance_any() and self._prev.text.upper()) 2795 2796 hint.set("expressions", self._parse_wrapped_id_vars()) 2797 hints.append(hint) 2798 2799 return hints or None 2800 2801 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2802 return ( 2803 (not schema and self._parse_function(optional_parens=False)) 2804 or self._parse_id_var(any_token=False) 2805 or self._parse_string_as_identifier() 2806 or self._parse_placeholder() 2807 ) 2808 2809 def _parse_table_parts( 2810 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2811 ) -> exp.Table: 2812 catalog = None 2813 db = None 2814 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2815 2816 while self._match(TokenType.DOT): 2817 if catalog: 2818 # This allows nesting the table in arbitrarily many dot expressions if needed 2819 table = self.expression( 2820 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2821 ) 2822 else: 2823 catalog = db 2824 db = table 2825 # "" used for tsql FROM a..b case 2826 table = self._parse_table_part(schema=schema) or "" 2827 2828 if ( 2829 wildcard 2830 and self._is_connected() 2831 and (isinstance(table, exp.Identifier) or not table) 2832 and self._match(TokenType.STAR) 2833 ): 2834 if isinstance(table, exp.Identifier): 2835 table.args["this"] += "*" 2836 else: 2837 table = exp.Identifier(this="*") 2838 2839 if is_db_reference: 2840 catalog = db 2841 db = table 2842 table = None 2843 2844 if not table and not is_db_reference: 2845 self.raise_error(f"Expected table name but got {self._curr}") 2846 if not db and is_db_reference: 2847 self.raise_error(f"Expected database name but got {self._curr}") 2848 2849 return self.expression( 2850 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2851 ) 2852 2853 def _parse_table( 2854 self, 2855 schema: bool = False, 2856 joins: bool = False, 2857 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2858 parse_bracket: bool = False, 2859 is_db_reference: bool = False, 2860 ) -> t.Optional[exp.Expression]: 2861 lateral = self._parse_lateral() 2862 if lateral: 2863 return lateral 2864 2865 unnest = self._parse_unnest() 2866 if unnest: 2867 return unnest 2868 2869 values = self._parse_derived_table_values() 2870 if values: 2871 return values 2872 2873 subquery = self._parse_select(table=True) 2874 if subquery: 2875 if not subquery.args.get("pivots"): 2876 subquery.set("pivots", self._parse_pivots()) 2877 return subquery 2878 2879 bracket = parse_bracket and self._parse_bracket(None) 2880 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2881 this = t.cast( 2882 exp.Expression, 2883 bracket 2884 or self._parse_bracket( 2885 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2886 ), 2887 ) 2888 2889 if schema: 2890 return self._parse_schema(this=this) 2891 2892 version = self._parse_version() 2893 2894 if version: 2895 this.set("version", version) 2896 2897 if self.dialect.ALIAS_POST_TABLESAMPLE: 2898 table_sample = self._parse_table_sample() 2899 2900 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2901 if alias: 2902 this.set("alias", alias) 2903 2904 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2905 return self.expression( 2906 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2907 ) 2908 2909 this.set("hints", self._parse_table_hints()) 2910 2911 if not this.args.get("pivots"): 2912 this.set("pivots", self._parse_pivots()) 2913 2914 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2915 table_sample = self._parse_table_sample() 2916 2917 if table_sample: 2918 table_sample.set("this", this) 2919 this = table_sample 2920 2921 if joins: 2922 for join in iter(self._parse_join, None): 2923 this.append("joins", join) 2924 2925 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2926 this.set("ordinality", True) 2927 this.set("alias", self._parse_table_alias()) 2928 2929 return this 2930 2931 def _parse_version(self) -> t.Optional[exp.Version]: 2932 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2933 this = "TIMESTAMP" 2934 elif self._match(TokenType.VERSION_SNAPSHOT): 2935 this = "VERSION" 2936 else: 2937 return None 2938 2939 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2940 kind = self._prev.text.upper() 2941 start = self._parse_bitwise() 2942 self._match_texts(("TO", "AND")) 2943 end = self._parse_bitwise() 2944 expression: t.Optional[exp.Expression] = self.expression( 2945 exp.Tuple, expressions=[start, end] 2946 ) 2947 elif self._match_text_seq("CONTAINED", "IN"): 2948 kind = "CONTAINED IN" 2949 expression = self.expression( 2950 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2951 ) 2952 elif self._match(TokenType.ALL): 2953 kind = "ALL" 2954 expression = None 2955 else: 2956 self._match_text_seq("AS", "OF") 2957 kind = "AS OF" 2958 expression = self._parse_type() 2959 2960 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2961 2962 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2963 if not self._match(TokenType.UNNEST): 2964 return None 2965 2966 expressions = self._parse_wrapped_csv(self._parse_equality) 2967 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2968 2969 alias = self._parse_table_alias() if with_alias else None 2970 2971 if alias: 2972 if self.dialect.UNNEST_COLUMN_ONLY: 2973 if alias.args.get("columns"): 2974 self.raise_error("Unexpected extra column alias in unnest.") 2975 2976 alias.set("columns", [alias.this]) 2977 alias.set("this", None) 2978 2979 columns = alias.args.get("columns") or [] 2980 if offset and len(expressions) < len(columns): 2981 offset = columns.pop() 2982 2983 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2984 self._match(TokenType.ALIAS) 2985 offset = self._parse_id_var( 2986 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2987 ) or exp.to_identifier("offset") 2988 2989 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2990 2991 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2992 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2993 if not is_derived and not self._match_text_seq("VALUES"): 2994 return None 2995 2996 expressions = self._parse_csv(self._parse_value) 2997 alias = self._parse_table_alias() 2998 2999 if is_derived: 3000 self._match_r_paren() 3001 3002 return self.expression( 3003 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3004 ) 3005 3006 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3007 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3008 as_modifier and self._match_text_seq("USING", "SAMPLE") 3009 ): 3010 return None 3011 3012 bucket_numerator = None 3013 bucket_denominator = None 3014 bucket_field = None 3015 percent = None 3016 size = None 3017 seed = None 3018 3019 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3020 matched_l_paren = self._match(TokenType.L_PAREN) 3021 3022 if self.TABLESAMPLE_CSV: 3023 num = None 3024 expressions = self._parse_csv(self._parse_primary) 3025 else: 3026 expressions = None 3027 num = ( 3028 self._parse_factor() 3029 if self._match(TokenType.NUMBER, advance=False) 3030 else self._parse_primary() or self._parse_placeholder() 3031 ) 3032 3033 if self._match_text_seq("BUCKET"): 3034 bucket_numerator = self._parse_number() 3035 self._match_text_seq("OUT", "OF") 3036 bucket_denominator = bucket_denominator = self._parse_number() 3037 self._match(TokenType.ON) 3038 bucket_field = self._parse_field() 3039 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3040 percent = num 3041 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3042 size = num 3043 else: 3044 percent = num 3045 3046 if matched_l_paren: 3047 self._match_r_paren() 3048 3049 if self._match(TokenType.L_PAREN): 3050 method = self._parse_var(upper=True) 3051 seed = self._match(TokenType.COMMA) and self._parse_number() 3052 self._match_r_paren() 3053 elif self._match_texts(("SEED", "REPEATABLE")): 3054 seed = self._parse_wrapped(self._parse_number) 3055 3056 return self.expression( 3057 exp.TableSample, 3058 expressions=expressions, 3059 method=method, 3060 bucket_numerator=bucket_numerator, 3061 bucket_denominator=bucket_denominator, 3062 bucket_field=bucket_field, 3063 percent=percent, 3064 size=size, 3065 seed=seed, 3066 ) 3067 3068 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3069 return list(iter(self._parse_pivot, None)) or None 3070 3071 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3072 return list(iter(self._parse_join, None)) or None 3073 3074 # https://duckdb.org/docs/sql/statements/pivot 3075 def _parse_simplified_pivot(self) -> exp.Pivot: 3076 def _parse_on() -> t.Optional[exp.Expression]: 3077 this = self._parse_bitwise() 3078 return self._parse_in(this) if self._match(TokenType.IN) else this 3079 3080 this = self._parse_table() 3081 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3082 using = self._match(TokenType.USING) and self._parse_csv( 3083 lambda: self._parse_alias(self._parse_function()) 3084 ) 3085 group = self._parse_group() 3086 return self.expression( 3087 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3088 ) 3089 3090 def _parse_pivot_in(self) -> exp.In: 3091 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3092 this = self._parse_conjunction() 3093 3094 self._match(TokenType.ALIAS) 3095 alias = self._parse_field() 3096 if alias: 3097 return self.expression(exp.PivotAlias, this=this, alias=alias) 3098 3099 return this 3100 3101 value = self._parse_column() 3102 3103 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3104 self.raise_error("Expecting IN (") 3105 3106 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3107 3108 self._match_r_paren() 3109 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3110 3111 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3112 index = self._index 3113 include_nulls = None 3114 3115 if self._match(TokenType.PIVOT): 3116 unpivot = False 3117 elif self._match(TokenType.UNPIVOT): 3118 unpivot = True 3119 3120 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3121 if self._match_text_seq("INCLUDE", "NULLS"): 3122 include_nulls = True 3123 elif self._match_text_seq("EXCLUDE", "NULLS"): 3124 include_nulls = False 3125 else: 3126 return None 3127 3128 expressions = [] 3129 3130 if not self._match(TokenType.L_PAREN): 3131 self._retreat(index) 3132 return None 3133 3134 if unpivot: 3135 expressions = self._parse_csv(self._parse_column) 3136 else: 3137 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3138 3139 if not expressions: 3140 self.raise_error("Failed to parse PIVOT's aggregation list") 3141 3142 if not self._match(TokenType.FOR): 3143 self.raise_error("Expecting FOR") 3144 3145 field = self._parse_pivot_in() 3146 3147 self._match_r_paren() 3148 3149 pivot = self.expression( 3150 exp.Pivot, 3151 expressions=expressions, 3152 field=field, 3153 unpivot=unpivot, 3154 include_nulls=include_nulls, 3155 ) 3156 3157 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3158 pivot.set("alias", self._parse_table_alias()) 3159 3160 if not unpivot: 3161 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3162 3163 columns: t.List[exp.Expression] = [] 3164 for fld in pivot.args["field"].expressions: 3165 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3166 for name in names: 3167 if self.PREFIXED_PIVOT_COLUMNS: 3168 name = f"{name}_{field_name}" if name else field_name 3169 else: 3170 name = f"{field_name}_{name}" if name else field_name 3171 3172 columns.append(exp.to_identifier(name)) 3173 3174 pivot.set("columns", columns) 3175 3176 return pivot 3177 3178 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3179 return [agg.alias for agg in aggregations] 3180 3181 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3182 if not skip_where_token and not self._match(TokenType.WHERE): 3183 return None 3184 3185 return self.expression( 3186 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3187 ) 3188 3189 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3190 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3191 return None 3192 3193 elements = defaultdict(list) 3194 3195 if self._match(TokenType.ALL): 3196 return self.expression(exp.Group, all=True) 3197 3198 while True: 3199 expressions = self._parse_csv(self._parse_conjunction) 3200 if expressions: 3201 elements["expressions"].extend(expressions) 3202 3203 grouping_sets = self._parse_grouping_sets() 3204 if grouping_sets: 3205 elements["grouping_sets"].extend(grouping_sets) 3206 3207 rollup = None 3208 cube = None 3209 totals = None 3210 3211 index = self._index 3212 with_ = self._match(TokenType.WITH) 3213 if self._match(TokenType.ROLLUP): 3214 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3215 elements["rollup"].extend(ensure_list(rollup)) 3216 3217 if self._match(TokenType.CUBE): 3218 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3219 elements["cube"].extend(ensure_list(cube)) 3220 3221 if self._match_text_seq("TOTALS"): 3222 totals = True 3223 elements["totals"] = True # type: ignore 3224 3225 if not (grouping_sets or rollup or cube or totals): 3226 if with_: 3227 self._retreat(index) 3228 break 3229 3230 return self.expression(exp.Group, **elements) # type: ignore 3231 3232 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3233 if not self._match(TokenType.GROUPING_SETS): 3234 return None 3235 3236 return self._parse_wrapped_csv(self._parse_grouping_set) 3237 3238 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3239 if self._match(TokenType.L_PAREN): 3240 grouping_set = self._parse_csv(self._parse_column) 3241 self._match_r_paren() 3242 return self.expression(exp.Tuple, expressions=grouping_set) 3243 3244 return self._parse_column() 3245 3246 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3247 if not skip_having_token and not self._match(TokenType.HAVING): 3248 return None 3249 return self.expression(exp.Having, this=self._parse_conjunction()) 3250 3251 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3252 if not self._match(TokenType.QUALIFY): 3253 return None 3254 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3255 3256 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3257 if skip_start_token: 3258 start = None 3259 elif self._match(TokenType.START_WITH): 3260 start = self._parse_conjunction() 3261 else: 3262 return None 3263 3264 self._match(TokenType.CONNECT_BY) 3265 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3266 exp.Prior, this=self._parse_bitwise() 3267 ) 3268 connect = self._parse_conjunction() 3269 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3270 3271 if not start and self._match(TokenType.START_WITH): 3272 start = self._parse_conjunction() 3273 3274 return self.expression(exp.Connect, start=start, connect=connect) 3275 3276 def _parse_name_as_expression(self) -> exp.Alias: 3277 return self.expression( 3278 exp.Alias, 3279 alias=self._parse_id_var(any_token=True), 3280 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3281 ) 3282 3283 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3284 if self._match_text_seq("INTERPOLATE"): 3285 return self._parse_wrapped_csv(self._parse_name_as_expression) 3286 return None 3287 3288 def _parse_order( 3289 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3290 ) -> t.Optional[exp.Expression]: 3291 siblings = None 3292 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3293 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3294 return this 3295 3296 siblings = True 3297 3298 return self.expression( 3299 exp.Order, 3300 this=this, 3301 expressions=self._parse_csv(self._parse_ordered), 3302 interpolate=self._parse_interpolate(), 3303 siblings=siblings, 3304 ) 3305 3306 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3307 if not self._match(token): 3308 return None 3309 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3310 3311 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3312 this = parse_method() if parse_method else self._parse_conjunction() 3313 3314 asc = self._match(TokenType.ASC) 3315 desc = self._match(TokenType.DESC) or (asc and False) 3316 3317 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3318 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3319 3320 nulls_first = is_nulls_first or False 3321 explicitly_null_ordered = is_nulls_first or is_nulls_last 3322 3323 if ( 3324 not explicitly_null_ordered 3325 and ( 3326 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3327 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3328 ) 3329 and self.dialect.NULL_ORDERING != "nulls_are_last" 3330 ): 3331 nulls_first = True 3332 3333 if self._match_text_seq("WITH", "FILL"): 3334 with_fill = self.expression( 3335 exp.WithFill, 3336 **{ # type: ignore 3337 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3338 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3339 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3340 }, 3341 ) 3342 else: 3343 with_fill = None 3344 3345 return self.expression( 3346 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3347 ) 3348 3349 def _parse_limit( 3350 self, this: t.Optional[exp.Expression] = None, top: bool = False 3351 ) -> t.Optional[exp.Expression]: 3352 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3353 comments = self._prev_comments 3354 if top: 3355 limit_paren = self._match(TokenType.L_PAREN) 3356 expression = self._parse_term() if limit_paren else self._parse_number() 3357 3358 if limit_paren: 3359 self._match_r_paren() 3360 else: 3361 expression = self._parse_term() 3362 3363 if self._match(TokenType.COMMA): 3364 offset = expression 3365 expression = self._parse_term() 3366 else: 3367 offset = None 3368 3369 limit_exp = self.expression( 3370 exp.Limit, 3371 this=this, 3372 expression=expression, 3373 offset=offset, 3374 comments=comments, 3375 expressions=self._parse_limit_by(), 3376 ) 3377 3378 return limit_exp 3379 3380 if self._match(TokenType.FETCH): 3381 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3382 direction = self._prev.text.upper() if direction else "FIRST" 3383 3384 count = self._parse_field(tokens=self.FETCH_TOKENS) 3385 percent = self._match(TokenType.PERCENT) 3386 3387 self._match_set((TokenType.ROW, TokenType.ROWS)) 3388 3389 only = self._match_text_seq("ONLY") 3390 with_ties = self._match_text_seq("WITH", "TIES") 3391 3392 if only and with_ties: 3393 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3394 3395 return self.expression( 3396 exp.Fetch, 3397 direction=direction, 3398 count=count, 3399 percent=percent, 3400 with_ties=with_ties, 3401 ) 3402 3403 return this 3404 3405 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3406 if not self._match(TokenType.OFFSET): 3407 return this 3408 3409 count = self._parse_term() 3410 self._match_set((TokenType.ROW, TokenType.ROWS)) 3411 3412 return self.expression( 3413 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3414 ) 3415 3416 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3417 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3418 3419 def _parse_locks(self) -> t.List[exp.Lock]: 3420 locks = [] 3421 while True: 3422 if self._match_text_seq("FOR", "UPDATE"): 3423 update = True 3424 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3425 "LOCK", "IN", "SHARE", "MODE" 3426 ): 3427 update = False 3428 else: 3429 break 3430 3431 expressions = None 3432 if self._match_text_seq("OF"): 3433 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3434 3435 wait: t.Optional[bool | exp.Expression] = None 3436 if self._match_text_seq("NOWAIT"): 3437 wait = True 3438 elif self._match_text_seq("WAIT"): 3439 wait = self._parse_primary() 3440 elif self._match_text_seq("SKIP", "LOCKED"): 3441 wait = False 3442 3443 locks.append( 3444 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3445 ) 3446 3447 return locks 3448 3449 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3450 while this and self._match_set(self.SET_OPERATIONS): 3451 token_type = self._prev.token_type 3452 3453 if token_type == TokenType.UNION: 3454 operation = exp.Union 3455 elif token_type == TokenType.EXCEPT: 3456 operation = exp.Except 3457 else: 3458 operation = exp.Intersect 3459 3460 comments = self._prev.comments 3461 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3462 by_name = self._match_text_seq("BY", "NAME") 3463 expression = self._parse_select(nested=True, parse_set_operation=False) 3464 3465 this = self.expression( 3466 operation, 3467 comments=comments, 3468 this=this, 3469 distinct=distinct, 3470 by_name=by_name, 3471 expression=expression, 3472 ) 3473 3474 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3475 expression = this.expression 3476 3477 if expression: 3478 for arg in self.UNION_MODIFIERS: 3479 expr = expression.args.get(arg) 3480 if expr: 3481 this.set(arg, expr.pop()) 3482 3483 return this 3484 3485 def _parse_expression(self) -> t.Optional[exp.Expression]: 3486 return self._parse_alias(self._parse_conjunction()) 3487 3488 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3489 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3490 3491 def _parse_equality(self) -> t.Optional[exp.Expression]: 3492 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3493 3494 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3495 return self._parse_tokens(self._parse_range, self.COMPARISON) 3496 3497 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3498 this = this or self._parse_bitwise() 3499 negate = self._match(TokenType.NOT) 3500 3501 if self._match_set(self.RANGE_PARSERS): 3502 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3503 if not expression: 3504 return this 3505 3506 this = expression 3507 elif self._match(TokenType.ISNULL): 3508 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3509 3510 # Postgres supports ISNULL and NOTNULL for conditions. 3511 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3512 if self._match(TokenType.NOTNULL): 3513 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3514 this = self.expression(exp.Not, this=this) 3515 3516 if negate: 3517 this = self.expression(exp.Not, this=this) 3518 3519 if self._match(TokenType.IS): 3520 this = self._parse_is(this) 3521 3522 return this 3523 3524 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3525 index = self._index - 1 3526 negate = self._match(TokenType.NOT) 3527 3528 if self._match_text_seq("DISTINCT", "FROM"): 3529 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3530 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3531 3532 expression = self._parse_null() or self._parse_boolean() 3533 if not expression: 3534 self._retreat(index) 3535 return None 3536 3537 this = self.expression(exp.Is, this=this, expression=expression) 3538 return self.expression(exp.Not, this=this) if negate else this 3539 3540 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3541 unnest = self._parse_unnest(with_alias=False) 3542 if unnest: 3543 this = self.expression(exp.In, this=this, unnest=unnest) 3544 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3545 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3546 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3547 3548 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3549 this = self.expression(exp.In, this=this, query=expressions[0]) 3550 else: 3551 this = self.expression(exp.In, this=this, expressions=expressions) 3552 3553 if matched_l_paren: 3554 self._match_r_paren(this) 3555 elif not self._match(TokenType.R_BRACKET, expression=this): 3556 self.raise_error("Expecting ]") 3557 else: 3558 this = self.expression(exp.In, this=this, field=self._parse_field()) 3559 3560 return this 3561 3562 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3563 low = self._parse_bitwise() 3564 self._match(TokenType.AND) 3565 high = self._parse_bitwise() 3566 return self.expression(exp.Between, this=this, low=low, high=high) 3567 3568 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3569 if not self._match(TokenType.ESCAPE): 3570 return this 3571 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3572 3573 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3574 index = self._index 3575 3576 if not self._match(TokenType.INTERVAL) and match_interval: 3577 return None 3578 3579 if self._match(TokenType.STRING, advance=False): 3580 this = self._parse_primary() 3581 else: 3582 this = self._parse_term() 3583 3584 if not this or ( 3585 isinstance(this, exp.Column) 3586 and not this.table 3587 and not this.this.quoted 3588 and this.name.upper() == "IS" 3589 ): 3590 self._retreat(index) 3591 return None 3592 3593 unit = self._parse_function() or ( 3594 not self._match(TokenType.ALIAS, advance=False) 3595 and self._parse_var(any_token=True, upper=True) 3596 ) 3597 3598 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3599 # each INTERVAL expression into this canonical form so it's easy to transpile 3600 if this and this.is_number: 3601 this = exp.Literal.string(this.name) 3602 elif this and this.is_string: 3603 parts = this.name.split() 3604 3605 if len(parts) == 2: 3606 if unit: 3607 # This is not actually a unit, it's something else (e.g. a "window side") 3608 unit = None 3609 self._retreat(self._index - 1) 3610 3611 this = exp.Literal.string(parts[0]) 3612 unit = self.expression(exp.Var, this=parts[1].upper()) 3613 3614 return self.expression(exp.Interval, this=this, unit=unit) 3615 3616 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3617 this = self._parse_term() 3618 3619 while True: 3620 if self._match_set(self.BITWISE): 3621 this = self.expression( 3622 self.BITWISE[self._prev.token_type], 3623 this=this, 3624 expression=self._parse_term(), 3625 ) 3626 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3627 this = self.expression( 3628 exp.DPipe, 3629 this=this, 3630 expression=self._parse_term(), 3631 safe=not self.dialect.STRICT_STRING_CONCAT, 3632 ) 3633 elif self._match(TokenType.DQMARK): 3634 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3635 elif self._match_pair(TokenType.LT, TokenType.LT): 3636 this = self.expression( 3637 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3638 ) 3639 elif self._match_pair(TokenType.GT, TokenType.GT): 3640 this = self.expression( 3641 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3642 ) 3643 else: 3644 break 3645 3646 return this 3647 3648 def _parse_term(self) -> t.Optional[exp.Expression]: 3649 return self._parse_tokens(self._parse_factor, self.TERM) 3650 3651 def _parse_factor(self) -> t.Optional[exp.Expression]: 3652 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3653 this = parse_method() 3654 3655 while self._match_set(self.FACTOR): 3656 this = self.expression( 3657 self.FACTOR[self._prev.token_type], 3658 this=this, 3659 comments=self._prev_comments, 3660 expression=parse_method(), 3661 ) 3662 if isinstance(this, exp.Div): 3663 this.args["typed"] = self.dialect.TYPED_DIVISION 3664 this.args["safe"] = self.dialect.SAFE_DIVISION 3665 3666 return this 3667 3668 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3669 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3670 3671 def _parse_unary(self) -> t.Optional[exp.Expression]: 3672 if self._match_set(self.UNARY_PARSERS): 3673 return self.UNARY_PARSERS[self._prev.token_type](self) 3674 return self._parse_at_time_zone(self._parse_type()) 3675 3676 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3677 interval = parse_interval and self._parse_interval() 3678 if interval: 3679 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3680 while True: 3681 index = self._index 3682 self._match(TokenType.PLUS) 3683 3684 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3685 self._retreat(index) 3686 break 3687 3688 interval = self.expression( # type: ignore 3689 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3690 ) 3691 3692 return interval 3693 3694 index = self._index 3695 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3696 this = self._parse_column() 3697 3698 if data_type: 3699 if isinstance(this, exp.Literal): 3700 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3701 if parser: 3702 return parser(self, this, data_type) 3703 return self.expression(exp.Cast, this=this, to=data_type) 3704 if not data_type.expressions: 3705 self._retreat(index) 3706 return self._parse_column() 3707 return self._parse_column_ops(data_type) 3708 3709 return this and self._parse_column_ops(this) 3710 3711 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3712 this = self._parse_type() 3713 if not this: 3714 return None 3715 3716 return self.expression( 3717 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3718 ) 3719 3720 def _parse_types( 3721 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3722 ) -> t.Optional[exp.Expression]: 3723 index = self._index 3724 3725 prefix = self._match_text_seq("SYSUDTLIB", ".") 3726 3727 if not self._match_set(self.TYPE_TOKENS): 3728 identifier = allow_identifiers and self._parse_id_var( 3729 any_token=False, tokens=(TokenType.VAR,) 3730 ) 3731 if identifier: 3732 tokens = self.dialect.tokenize(identifier.name) 3733 3734 if len(tokens) != 1: 3735 self.raise_error("Unexpected identifier", self._prev) 3736 3737 if tokens[0].token_type in self.TYPE_TOKENS: 3738 self._prev = tokens[0] 3739 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3740 type_name = identifier.name 3741 3742 while self._match(TokenType.DOT): 3743 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3744 3745 return exp.DataType.build(type_name, udt=True) 3746 else: 3747 self._retreat(self._index - 1) 3748 return None 3749 else: 3750 return None 3751 3752 type_token = self._prev.token_type 3753 3754 if type_token == TokenType.PSEUDO_TYPE: 3755 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3756 3757 if type_token == TokenType.OBJECT_IDENTIFIER: 3758 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3759 3760 nested = type_token in self.NESTED_TYPE_TOKENS 3761 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3762 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3763 expressions = None 3764 maybe_func = False 3765 3766 if self._match(TokenType.L_PAREN): 3767 if is_struct: 3768 expressions = self._parse_csv(self._parse_struct_types) 3769 elif nested: 3770 expressions = self._parse_csv( 3771 lambda: self._parse_types( 3772 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3773 ) 3774 ) 3775 elif type_token in self.ENUM_TYPE_TOKENS: 3776 expressions = self._parse_csv(self._parse_equality) 3777 elif is_aggregate: 3778 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3779 any_token=False, tokens=(TokenType.VAR,) 3780 ) 3781 if not func_or_ident or not self._match(TokenType.COMMA): 3782 return None 3783 expressions = self._parse_csv( 3784 lambda: self._parse_types( 3785 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3786 ) 3787 ) 3788 expressions.insert(0, func_or_ident) 3789 else: 3790 expressions = self._parse_csv(self._parse_type_size) 3791 3792 if not expressions or not self._match(TokenType.R_PAREN): 3793 self._retreat(index) 3794 return None 3795 3796 maybe_func = True 3797 3798 this: t.Optional[exp.Expression] = None 3799 values: t.Optional[t.List[exp.Expression]] = None 3800 3801 if nested and self._match(TokenType.LT): 3802 if is_struct: 3803 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3804 else: 3805 expressions = self._parse_csv( 3806 lambda: self._parse_types( 3807 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3808 ) 3809 ) 3810 3811 if not self._match(TokenType.GT): 3812 self.raise_error("Expecting >") 3813 3814 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3815 values = self._parse_csv(self._parse_conjunction) 3816 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3817 3818 if type_token in self.TIMESTAMPS: 3819 if self._match_text_seq("WITH", "TIME", "ZONE"): 3820 maybe_func = False 3821 tz_type = ( 3822 exp.DataType.Type.TIMETZ 3823 if type_token in self.TIMES 3824 else exp.DataType.Type.TIMESTAMPTZ 3825 ) 3826 this = exp.DataType(this=tz_type, expressions=expressions) 3827 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3828 maybe_func = False 3829 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3830 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3831 maybe_func = False 3832 elif type_token == TokenType.INTERVAL: 3833 unit = self._parse_var() 3834 3835 if self._match_text_seq("TO"): 3836 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3837 else: 3838 span = None 3839 3840 if span or not unit: 3841 this = self.expression( 3842 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3843 ) 3844 else: 3845 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3846 3847 if maybe_func and check_func: 3848 index2 = self._index 3849 peek = self._parse_string() 3850 3851 if not peek: 3852 self._retreat(index) 3853 return None 3854 3855 self._retreat(index2) 3856 3857 if not this: 3858 if self._match_text_seq("UNSIGNED"): 3859 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3860 if not unsigned_type_token: 3861 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3862 3863 type_token = unsigned_type_token or type_token 3864 3865 this = exp.DataType( 3866 this=exp.DataType.Type[type_token.value], 3867 expressions=expressions, 3868 nested=nested, 3869 values=values, 3870 prefix=prefix, 3871 ) 3872 3873 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3874 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3875 3876 return this 3877 3878 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3879 index = self._index 3880 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3881 self._match(TokenType.COLON) 3882 column_def = self._parse_column_def(this) 3883 3884 if type_required and ( 3885 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3886 ): 3887 self._retreat(index) 3888 return self._parse_types() 3889 3890 return column_def 3891 3892 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3893 if not self._match_text_seq("AT", "TIME", "ZONE"): 3894 return this 3895 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3896 3897 def _parse_column(self) -> t.Optional[exp.Expression]: 3898 this = self._parse_column_reference() 3899 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3900 3901 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3902 this = self._parse_field() 3903 if ( 3904 not this 3905 and self._match(TokenType.VALUES, advance=False) 3906 and self.VALUES_FOLLOWED_BY_PAREN 3907 and (not self._next or self._next.token_type != TokenType.L_PAREN) 3908 ): 3909 this = self._parse_id_var() 3910 3911 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 3912 3913 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3914 this = self._parse_bracket(this) 3915 3916 while self._match_set(self.COLUMN_OPERATORS): 3917 op_token = self._prev.token_type 3918 op = self.COLUMN_OPERATORS.get(op_token) 3919 3920 if op_token == TokenType.DCOLON: 3921 field = self._parse_types() 3922 if not field: 3923 self.raise_error("Expected type") 3924 elif op and self._curr: 3925 field = self._parse_column_reference() 3926 else: 3927 field = self._parse_field(anonymous_func=True, any_token=True) 3928 3929 if isinstance(field, exp.Func): 3930 # bigquery allows function calls like x.y.count(...) 3931 # SAFE.SUBSTR(...) 3932 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3933 this = self._replace_columns_with_dots(this) 3934 3935 if op: 3936 this = op(self, this, field) 3937 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3938 this = self.expression( 3939 exp.Column, 3940 this=field, 3941 table=this.this, 3942 db=this.args.get("table"), 3943 catalog=this.args.get("db"), 3944 ) 3945 else: 3946 this = self.expression(exp.Dot, this=this, expression=field) 3947 this = self._parse_bracket(this) 3948 return this 3949 3950 def _parse_primary(self) -> t.Optional[exp.Expression]: 3951 if self._match_set(self.PRIMARY_PARSERS): 3952 token_type = self._prev.token_type 3953 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3954 3955 if token_type == TokenType.STRING: 3956 expressions = [primary] 3957 while self._match(TokenType.STRING): 3958 expressions.append(exp.Literal.string(self._prev.text)) 3959 3960 if len(expressions) > 1: 3961 return self.expression(exp.Concat, expressions=expressions) 3962 3963 return primary 3964 3965 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3966 return exp.Literal.number(f"0.{self._prev.text}") 3967 3968 if self._match(TokenType.L_PAREN): 3969 comments = self._prev_comments 3970 query = self._parse_select() 3971 3972 if query: 3973 expressions = [query] 3974 else: 3975 expressions = self._parse_expressions() 3976 3977 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3978 3979 if isinstance(this, exp.Subqueryable): 3980 this = self._parse_set_operations( 3981 self._parse_subquery(this=this, parse_alias=False) 3982 ) 3983 elif len(expressions) > 1: 3984 this = self.expression(exp.Tuple, expressions=expressions) 3985 else: 3986 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3987 3988 if this: 3989 this.add_comments(comments) 3990 3991 self._match_r_paren(expression=this) 3992 return this 3993 3994 return None 3995 3996 def _parse_field( 3997 self, 3998 any_token: bool = False, 3999 tokens: t.Optional[t.Collection[TokenType]] = None, 4000 anonymous_func: bool = False, 4001 ) -> t.Optional[exp.Expression]: 4002 return ( 4003 self._parse_primary() 4004 or self._parse_function(anonymous=anonymous_func) 4005 or self._parse_id_var(any_token=any_token, tokens=tokens) 4006 ) 4007 4008 def _parse_function( 4009 self, 4010 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4011 anonymous: bool = False, 4012 optional_parens: bool = True, 4013 ) -> t.Optional[exp.Expression]: 4014 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4015 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4016 fn_syntax = False 4017 if ( 4018 self._match(TokenType.L_BRACE, advance=False) 4019 and self._next 4020 and self._next.text.upper() == "FN" 4021 ): 4022 self._advance(2) 4023 fn_syntax = True 4024 4025 func = self._parse_function_call( 4026 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4027 ) 4028 4029 if fn_syntax: 4030 self._match(TokenType.R_BRACE) 4031 4032 return func 4033 4034 def _parse_function_call( 4035 self, 4036 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4037 anonymous: bool = False, 4038 optional_parens: bool = True, 4039 ) -> t.Optional[exp.Expression]: 4040 if not self._curr: 4041 return None 4042 4043 comments = self._curr.comments 4044 token_type = self._curr.token_type 4045 this = self._curr.text 4046 upper = this.upper() 4047 4048 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4049 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4050 self._advance() 4051 return parser(self) 4052 4053 if not self._next or self._next.token_type != TokenType.L_PAREN: 4054 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4055 self._advance() 4056 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4057 4058 return None 4059 4060 if token_type not in self.FUNC_TOKENS: 4061 return None 4062 4063 self._advance(2) 4064 4065 parser = self.FUNCTION_PARSERS.get(upper) 4066 if parser and not anonymous: 4067 this = parser(self) 4068 else: 4069 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4070 4071 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4072 this = self.expression(subquery_predicate, this=self._parse_select()) 4073 self._match_r_paren() 4074 return this 4075 4076 if functions is None: 4077 functions = self.FUNCTIONS 4078 4079 function = functions.get(upper) 4080 4081 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4082 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4083 4084 if function and not anonymous: 4085 if "dialect" in function.__code__.co_varnames: 4086 func = function(args, dialect=self.dialect) 4087 else: 4088 func = function(args) 4089 4090 func = self.validate_expression(func, args) 4091 if not self.dialect.NORMALIZE_FUNCTIONS: 4092 func.meta["name"] = this 4093 4094 this = func 4095 else: 4096 this = self.expression(exp.Anonymous, this=this, expressions=args) 4097 4098 if isinstance(this, exp.Expression): 4099 this.add_comments(comments) 4100 4101 self._match_r_paren(this) 4102 return self._parse_window(this) 4103 4104 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4105 return self._parse_column_def(self._parse_id_var()) 4106 4107 def _parse_user_defined_function( 4108 self, kind: t.Optional[TokenType] = None 4109 ) -> t.Optional[exp.Expression]: 4110 this = self._parse_id_var() 4111 4112 while self._match(TokenType.DOT): 4113 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4114 4115 if not self._match(TokenType.L_PAREN): 4116 return this 4117 4118 expressions = self._parse_csv(self._parse_function_parameter) 4119 self._match_r_paren() 4120 return self.expression( 4121 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4122 ) 4123 4124 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4125 literal = self._parse_primary() 4126 if literal: 4127 return self.expression(exp.Introducer, this=token.text, expression=literal) 4128 4129 return self.expression(exp.Identifier, this=token.text) 4130 4131 def _parse_session_parameter(self) -> exp.SessionParameter: 4132 kind = None 4133 this = self._parse_id_var() or self._parse_primary() 4134 4135 if this and self._match(TokenType.DOT): 4136 kind = this.name 4137 this = self._parse_var() or self._parse_primary() 4138 4139 return self.expression(exp.SessionParameter, this=this, kind=kind) 4140 4141 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4142 index = self._index 4143 4144 if self._match(TokenType.L_PAREN): 4145 expressions = t.cast( 4146 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4147 ) 4148 4149 if not self._match(TokenType.R_PAREN): 4150 self._retreat(index) 4151 else: 4152 expressions = [self._parse_id_var()] 4153 4154 if self._match_set(self.LAMBDAS): 4155 return self.LAMBDAS[self._prev.token_type](self, expressions) 4156 4157 self._retreat(index) 4158 4159 this: t.Optional[exp.Expression] 4160 4161 if self._match(TokenType.DISTINCT): 4162 this = self.expression( 4163 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4164 ) 4165 else: 4166 this = self._parse_select_or_expression(alias=alias) 4167 4168 return self._parse_limit( 4169 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4170 ) 4171 4172 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4173 index = self._index 4174 4175 if not self.errors: 4176 try: 4177 if self._parse_select(nested=True): 4178 return this 4179 except ParseError: 4180 pass 4181 finally: 4182 self.errors.clear() 4183 self._retreat(index) 4184 4185 if not self._match(TokenType.L_PAREN): 4186 return this 4187 4188 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4189 4190 self._match_r_paren() 4191 return self.expression(exp.Schema, this=this, expressions=args) 4192 4193 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4194 return self._parse_column_def(self._parse_field(any_token=True)) 4195 4196 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4197 # column defs are not really columns, they're identifiers 4198 if isinstance(this, exp.Column): 4199 this = this.this 4200 4201 kind = self._parse_types(schema=True) 4202 4203 if self._match_text_seq("FOR", "ORDINALITY"): 4204 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4205 4206 constraints: t.List[exp.Expression] = [] 4207 4208 if not kind and self._match(TokenType.ALIAS): 4209 constraints.append( 4210 self.expression( 4211 exp.ComputedColumnConstraint, 4212 this=self._parse_conjunction(), 4213 persisted=self._match_text_seq("PERSISTED"), 4214 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4215 ) 4216 ) 4217 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4218 self._match(TokenType.ALIAS) 4219 constraints.append( 4220 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4221 ) 4222 4223 while True: 4224 constraint = self._parse_column_constraint() 4225 if not constraint: 4226 break 4227 constraints.append(constraint) 4228 4229 if not kind and not constraints: 4230 return this 4231 4232 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4233 4234 def _parse_auto_increment( 4235 self, 4236 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4237 start = None 4238 increment = None 4239 4240 if self._match(TokenType.L_PAREN, advance=False): 4241 args = self._parse_wrapped_csv(self._parse_bitwise) 4242 start = seq_get(args, 0) 4243 increment = seq_get(args, 1) 4244 elif self._match_text_seq("START"): 4245 start = self._parse_bitwise() 4246 self._match_text_seq("INCREMENT") 4247 increment = self._parse_bitwise() 4248 4249 if start and increment: 4250 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4251 4252 return exp.AutoIncrementColumnConstraint() 4253 4254 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4255 if not self._match_text_seq("REFRESH"): 4256 self._retreat(self._index - 1) 4257 return None 4258 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4259 4260 def _parse_compress(self) -> exp.CompressColumnConstraint: 4261 if self._match(TokenType.L_PAREN, advance=False): 4262 return self.expression( 4263 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4264 ) 4265 4266 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4267 4268 def _parse_generated_as_identity( 4269 self, 4270 ) -> ( 4271 exp.GeneratedAsIdentityColumnConstraint 4272 | exp.ComputedColumnConstraint 4273 | exp.GeneratedAsRowColumnConstraint 4274 ): 4275 if self._match_text_seq("BY", "DEFAULT"): 4276 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4277 this = self.expression( 4278 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4279 ) 4280 else: 4281 self._match_text_seq("ALWAYS") 4282 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4283 4284 self._match(TokenType.ALIAS) 4285 4286 if self._match_text_seq("ROW"): 4287 start = self._match_text_seq("START") 4288 if not start: 4289 self._match(TokenType.END) 4290 hidden = self._match_text_seq("HIDDEN") 4291 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4292 4293 identity = self._match_text_seq("IDENTITY") 4294 4295 if self._match(TokenType.L_PAREN): 4296 if self._match(TokenType.START_WITH): 4297 this.set("start", self._parse_bitwise()) 4298 if self._match_text_seq("INCREMENT", "BY"): 4299 this.set("increment", self._parse_bitwise()) 4300 if self._match_text_seq("MINVALUE"): 4301 this.set("minvalue", self._parse_bitwise()) 4302 if self._match_text_seq("MAXVALUE"): 4303 this.set("maxvalue", self._parse_bitwise()) 4304 4305 if self._match_text_seq("CYCLE"): 4306 this.set("cycle", True) 4307 elif self._match_text_seq("NO", "CYCLE"): 4308 this.set("cycle", False) 4309 4310 if not identity: 4311 this.set("expression", self._parse_bitwise()) 4312 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4313 args = self._parse_csv(self._parse_bitwise) 4314 this.set("start", seq_get(args, 0)) 4315 this.set("increment", seq_get(args, 1)) 4316 4317 self._match_r_paren() 4318 4319 return this 4320 4321 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4322 self._match_text_seq("LENGTH") 4323 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4324 4325 def _parse_not_constraint( 4326 self, 4327 ) -> t.Optional[exp.Expression]: 4328 if self._match_text_seq("NULL"): 4329 return self.expression(exp.NotNullColumnConstraint) 4330 if self._match_text_seq("CASESPECIFIC"): 4331 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4332 if self._match_text_seq("FOR", "REPLICATION"): 4333 return self.expression(exp.NotForReplicationColumnConstraint) 4334 return None 4335 4336 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4337 if self._match(TokenType.CONSTRAINT): 4338 this = self._parse_id_var() 4339 else: 4340 this = None 4341 4342 if self._match_texts(self.CONSTRAINT_PARSERS): 4343 return self.expression( 4344 exp.ColumnConstraint, 4345 this=this, 4346 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4347 ) 4348 4349 return this 4350 4351 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4352 if not self._match(TokenType.CONSTRAINT): 4353 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4354 4355 this = self._parse_id_var() 4356 expressions = [] 4357 4358 while True: 4359 constraint = self._parse_unnamed_constraint() or self._parse_function() 4360 if not constraint: 4361 break 4362 expressions.append(constraint) 4363 4364 return self.expression(exp.Constraint, this=this, expressions=expressions) 4365 4366 def _parse_unnamed_constraint( 4367 self, constraints: t.Optional[t.Collection[str]] = None 4368 ) -> t.Optional[exp.Expression]: 4369 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4370 constraints or self.CONSTRAINT_PARSERS 4371 ): 4372 return None 4373 4374 constraint = self._prev.text.upper() 4375 if constraint not in self.CONSTRAINT_PARSERS: 4376 self.raise_error(f"No parser found for schema constraint {constraint}.") 4377 4378 return self.CONSTRAINT_PARSERS[constraint](self) 4379 4380 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4381 self._match_text_seq("KEY") 4382 return self.expression( 4383 exp.UniqueColumnConstraint, 4384 this=self._parse_schema(self._parse_id_var(any_token=False)), 4385 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4386 ) 4387 4388 def _parse_key_constraint_options(self) -> t.List[str]: 4389 options = [] 4390 while True: 4391 if not self._curr: 4392 break 4393 4394 if self._match(TokenType.ON): 4395 action = None 4396 on = self._advance_any() and self._prev.text 4397 4398 if self._match_text_seq("NO", "ACTION"): 4399 action = "NO ACTION" 4400 elif self._match_text_seq("CASCADE"): 4401 action = "CASCADE" 4402 elif self._match_text_seq("RESTRICT"): 4403 action = "RESTRICT" 4404 elif self._match_pair(TokenType.SET, TokenType.NULL): 4405 action = "SET NULL" 4406 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4407 action = "SET DEFAULT" 4408 else: 4409 self.raise_error("Invalid key constraint") 4410 4411 options.append(f"ON {on} {action}") 4412 elif self._match_text_seq("NOT", "ENFORCED"): 4413 options.append("NOT ENFORCED") 4414 elif self._match_text_seq("DEFERRABLE"): 4415 options.append("DEFERRABLE") 4416 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4417 options.append("INITIALLY DEFERRED") 4418 elif self._match_text_seq("NORELY"): 4419 options.append("NORELY") 4420 elif self._match_text_seq("MATCH", "FULL"): 4421 options.append("MATCH FULL") 4422 else: 4423 break 4424 4425 return options 4426 4427 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4428 if match and not self._match(TokenType.REFERENCES): 4429 return None 4430 4431 expressions = None 4432 this = self._parse_table(schema=True) 4433 options = self._parse_key_constraint_options() 4434 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4435 4436 def _parse_foreign_key(self) -> exp.ForeignKey: 4437 expressions = self._parse_wrapped_id_vars() 4438 reference = self._parse_references() 4439 options = {} 4440 4441 while self._match(TokenType.ON): 4442 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4443 self.raise_error("Expected DELETE or UPDATE") 4444 4445 kind = self._prev.text.lower() 4446 4447 if self._match_text_seq("NO", "ACTION"): 4448 action = "NO ACTION" 4449 elif self._match(TokenType.SET): 4450 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4451 action = "SET " + self._prev.text.upper() 4452 else: 4453 self._advance() 4454 action = self._prev.text.upper() 4455 4456 options[kind] = action 4457 4458 return self.expression( 4459 exp.ForeignKey, 4460 expressions=expressions, 4461 reference=reference, 4462 **options, # type: ignore 4463 ) 4464 4465 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4466 return self._parse_field() 4467 4468 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4469 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4470 self._retreat(self._index - 1) 4471 return None 4472 4473 id_vars = self._parse_wrapped_id_vars() 4474 return self.expression( 4475 exp.PeriodForSystemTimeConstraint, 4476 this=seq_get(id_vars, 0), 4477 expression=seq_get(id_vars, 1), 4478 ) 4479 4480 def _parse_primary_key( 4481 self, wrapped_optional: bool = False, in_props: bool = False 4482 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4483 desc = ( 4484 self._match_set((TokenType.ASC, TokenType.DESC)) 4485 and self._prev.token_type == TokenType.DESC 4486 ) 4487 4488 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4489 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4490 4491 expressions = self._parse_wrapped_csv( 4492 self._parse_primary_key_part, optional=wrapped_optional 4493 ) 4494 options = self._parse_key_constraint_options() 4495 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4496 4497 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4498 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4499 4500 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4501 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4502 return this 4503 4504 bracket_kind = self._prev.token_type 4505 expressions = self._parse_csv( 4506 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4507 ) 4508 4509 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4510 self.raise_error("Expected ]") 4511 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4512 self.raise_error("Expected }") 4513 4514 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4515 if bracket_kind == TokenType.L_BRACE: 4516 this = self.expression(exp.Struct, expressions=expressions) 4517 elif not this or this.name.upper() == "ARRAY": 4518 this = self.expression(exp.Array, expressions=expressions) 4519 else: 4520 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4521 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4522 4523 self._add_comments(this) 4524 return self._parse_bracket(this) 4525 4526 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 if self._match(TokenType.COLON): 4528 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4529 return this 4530 4531 def _parse_case(self) -> t.Optional[exp.Expression]: 4532 ifs = [] 4533 default = None 4534 4535 comments = self._prev_comments 4536 expression = self._parse_conjunction() 4537 4538 while self._match(TokenType.WHEN): 4539 this = self._parse_conjunction() 4540 self._match(TokenType.THEN) 4541 then = self._parse_conjunction() 4542 ifs.append(self.expression(exp.If, this=this, true=then)) 4543 4544 if self._match(TokenType.ELSE): 4545 default = self._parse_conjunction() 4546 4547 if not self._match(TokenType.END): 4548 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4549 default = exp.column("interval") 4550 else: 4551 self.raise_error("Expected END after CASE", self._prev) 4552 4553 return self._parse_window( 4554 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4555 ) 4556 4557 def _parse_if(self) -> t.Optional[exp.Expression]: 4558 if self._match(TokenType.L_PAREN): 4559 args = self._parse_csv(self._parse_conjunction) 4560 this = self.validate_expression(exp.If.from_arg_list(args), args) 4561 self._match_r_paren() 4562 else: 4563 index = self._index - 1 4564 4565 if self.NO_PAREN_IF_COMMANDS and index == 0: 4566 return self._parse_as_command(self._prev) 4567 4568 condition = self._parse_conjunction() 4569 4570 if not condition: 4571 self._retreat(index) 4572 return None 4573 4574 self._match(TokenType.THEN) 4575 true = self._parse_conjunction() 4576 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4577 self._match(TokenType.END) 4578 this = self.expression(exp.If, this=condition, true=true, false=false) 4579 4580 return self._parse_window(this) 4581 4582 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4583 if not self._match_text_seq("VALUE", "FOR"): 4584 self._retreat(self._index - 1) 4585 return None 4586 4587 return self.expression( 4588 exp.NextValueFor, 4589 this=self._parse_column(), 4590 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4591 ) 4592 4593 def _parse_extract(self) -> exp.Extract: 4594 this = self._parse_function() or self._parse_var() or self._parse_type() 4595 4596 if self._match(TokenType.FROM): 4597 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4598 4599 if not self._match(TokenType.COMMA): 4600 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4601 4602 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4603 4604 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4605 this = self._parse_conjunction() 4606 4607 if not self._match(TokenType.ALIAS): 4608 if self._match(TokenType.COMMA): 4609 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4610 4611 self.raise_error("Expected AS after CAST") 4612 4613 fmt = None 4614 to = self._parse_types() 4615 4616 if self._match(TokenType.FORMAT): 4617 fmt_string = self._parse_string() 4618 fmt = self._parse_at_time_zone(fmt_string) 4619 4620 if not to: 4621 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4622 if to.this in exp.DataType.TEMPORAL_TYPES: 4623 this = self.expression( 4624 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4625 this=this, 4626 format=exp.Literal.string( 4627 format_time( 4628 fmt_string.this if fmt_string else "", 4629 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4630 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4631 ) 4632 ), 4633 ) 4634 4635 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4636 this.set("zone", fmt.args["zone"]) 4637 return this 4638 elif not to: 4639 self.raise_error("Expected TYPE after CAST") 4640 elif isinstance(to, exp.Identifier): 4641 to = exp.DataType.build(to.name, udt=True) 4642 elif to.this == exp.DataType.Type.CHAR: 4643 if self._match(TokenType.CHARACTER_SET): 4644 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4645 4646 return self.expression( 4647 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4648 ) 4649 4650 def _parse_string_agg(self) -> exp.Expression: 4651 if self._match(TokenType.DISTINCT): 4652 args: t.List[t.Optional[exp.Expression]] = [ 4653 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4654 ] 4655 if self._match(TokenType.COMMA): 4656 args.extend(self._parse_csv(self._parse_conjunction)) 4657 else: 4658 args = self._parse_csv(self._parse_conjunction) # type: ignore 4659 4660 index = self._index 4661 if not self._match(TokenType.R_PAREN) and args: 4662 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4663 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4664 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4665 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4666 4667 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4668 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4669 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4670 if not self._match_text_seq("WITHIN", "GROUP"): 4671 self._retreat(index) 4672 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4673 4674 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4675 order = self._parse_order(this=seq_get(args, 0)) 4676 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4677 4678 def _parse_convert( 4679 self, strict: bool, safe: t.Optional[bool] = None 4680 ) -> t.Optional[exp.Expression]: 4681 this = self._parse_bitwise() 4682 4683 if self._match(TokenType.USING): 4684 to: t.Optional[exp.Expression] = self.expression( 4685 exp.CharacterSet, this=self._parse_var() 4686 ) 4687 elif self._match(TokenType.COMMA): 4688 to = self._parse_types() 4689 else: 4690 to = None 4691 4692 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4693 4694 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4695 """ 4696 There are generally two variants of the DECODE function: 4697 4698 - DECODE(bin, charset) 4699 - DECODE(expression, search, result [, search, result] ... [, default]) 4700 4701 The second variant will always be parsed into a CASE expression. Note that NULL 4702 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4703 instead of relying on pattern matching. 4704 """ 4705 args = self._parse_csv(self._parse_conjunction) 4706 4707 if len(args) < 3: 4708 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4709 4710 expression, *expressions = args 4711 if not expression: 4712 return None 4713 4714 ifs = [] 4715 for search, result in zip(expressions[::2], expressions[1::2]): 4716 if not search or not result: 4717 return None 4718 4719 if isinstance(search, exp.Literal): 4720 ifs.append( 4721 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4722 ) 4723 elif isinstance(search, exp.Null): 4724 ifs.append( 4725 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4726 ) 4727 else: 4728 cond = exp.or_( 4729 exp.EQ(this=expression.copy(), expression=search), 4730 exp.and_( 4731 exp.Is(this=expression.copy(), expression=exp.Null()), 4732 exp.Is(this=search.copy(), expression=exp.Null()), 4733 copy=False, 4734 ), 4735 copy=False, 4736 ) 4737 ifs.append(exp.If(this=cond, true=result)) 4738 4739 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4740 4741 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4742 self._match_text_seq("KEY") 4743 key = self._parse_column() 4744 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4745 self._match_text_seq("VALUE") 4746 value = self._parse_bitwise() 4747 4748 if not key and not value: 4749 return None 4750 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4751 4752 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4753 if not this or not self._match_text_seq("FORMAT", "JSON"): 4754 return this 4755 4756 return self.expression(exp.FormatJson, this=this) 4757 4758 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4759 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4760 for value in values: 4761 if self._match_text_seq(value, "ON", on): 4762 return f"{value} ON {on}" 4763 4764 return None 4765 4766 @t.overload 4767 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4768 ... 4769 4770 @t.overload 4771 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4772 ... 4773 4774 def _parse_json_object(self, agg=False): 4775 star = self._parse_star() 4776 expressions = ( 4777 [star] 4778 if star 4779 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4780 ) 4781 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4782 4783 unique_keys = None 4784 if self._match_text_seq("WITH", "UNIQUE"): 4785 unique_keys = True 4786 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4787 unique_keys = False 4788 4789 self._match_text_seq("KEYS") 4790 4791 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4792 self._parse_type() 4793 ) 4794 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4795 4796 return self.expression( 4797 exp.JSONObjectAgg if agg else exp.JSONObject, 4798 expressions=expressions, 4799 null_handling=null_handling, 4800 unique_keys=unique_keys, 4801 return_type=return_type, 4802 encoding=encoding, 4803 ) 4804 4805 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4806 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4807 if not self._match_text_seq("NESTED"): 4808 this = self._parse_id_var() 4809 kind = self._parse_types(allow_identifiers=False) 4810 nested = None 4811 else: 4812 this = None 4813 kind = None 4814 nested = True 4815 4816 path = self._match_text_seq("PATH") and self._parse_string() 4817 nested_schema = nested and self._parse_json_schema() 4818 4819 return self.expression( 4820 exp.JSONColumnDef, 4821 this=this, 4822 kind=kind, 4823 path=path, 4824 nested_schema=nested_schema, 4825 ) 4826 4827 def _parse_json_schema(self) -> exp.JSONSchema: 4828 self._match_text_seq("COLUMNS") 4829 return self.expression( 4830 exp.JSONSchema, 4831 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4832 ) 4833 4834 def _parse_json_table(self) -> exp.JSONTable: 4835 this = self._parse_format_json(self._parse_bitwise()) 4836 path = self._match(TokenType.COMMA) and self._parse_string() 4837 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4838 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4839 schema = self._parse_json_schema() 4840 4841 return exp.JSONTable( 4842 this=this, 4843 schema=schema, 4844 path=path, 4845 error_handling=error_handling, 4846 empty_handling=empty_handling, 4847 ) 4848 4849 def _parse_match_against(self) -> exp.MatchAgainst: 4850 expressions = self._parse_csv(self._parse_column) 4851 4852 self._match_text_seq(")", "AGAINST", "(") 4853 4854 this = self._parse_string() 4855 4856 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4857 modifier = "IN NATURAL LANGUAGE MODE" 4858 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4859 modifier = f"{modifier} WITH QUERY EXPANSION" 4860 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4861 modifier = "IN BOOLEAN MODE" 4862 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4863 modifier = "WITH QUERY EXPANSION" 4864 else: 4865 modifier = None 4866 4867 return self.expression( 4868 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4869 ) 4870 4871 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4872 def _parse_open_json(self) -> exp.OpenJSON: 4873 this = self._parse_bitwise() 4874 path = self._match(TokenType.COMMA) and self._parse_string() 4875 4876 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4877 this = self._parse_field(any_token=True) 4878 kind = self._parse_types() 4879 path = self._parse_string() 4880 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4881 4882 return self.expression( 4883 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4884 ) 4885 4886 expressions = None 4887 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4888 self._match_l_paren() 4889 expressions = self._parse_csv(_parse_open_json_column_def) 4890 4891 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4892 4893 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4894 args = self._parse_csv(self._parse_bitwise) 4895 4896 if self._match(TokenType.IN): 4897 return self.expression( 4898 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4899 ) 4900 4901 if haystack_first: 4902 haystack = seq_get(args, 0) 4903 needle = seq_get(args, 1) 4904 else: 4905 needle = seq_get(args, 0) 4906 haystack = seq_get(args, 1) 4907 4908 return self.expression( 4909 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4910 ) 4911 4912 def _parse_predict(self) -> exp.Predict: 4913 self._match_text_seq("MODEL") 4914 this = self._parse_table() 4915 4916 self._match(TokenType.COMMA) 4917 self._match_text_seq("TABLE") 4918 4919 return self.expression( 4920 exp.Predict, 4921 this=this, 4922 expression=self._parse_table(), 4923 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4924 ) 4925 4926 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4927 args = self._parse_csv(self._parse_table) 4928 return exp.JoinHint(this=func_name.upper(), expressions=args) 4929 4930 def _parse_substring(self) -> exp.Substring: 4931 # Postgres supports the form: substring(string [from int] [for int]) 4932 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4933 4934 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4935 4936 if self._match(TokenType.FROM): 4937 args.append(self._parse_bitwise()) 4938 if self._match(TokenType.FOR): 4939 args.append(self._parse_bitwise()) 4940 4941 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4942 4943 def _parse_trim(self) -> exp.Trim: 4944 # https://www.w3resource.com/sql/character-functions/trim.php 4945 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4946 4947 position = None 4948 collation = None 4949 expression = None 4950 4951 if self._match_texts(self.TRIM_TYPES): 4952 position = self._prev.text.upper() 4953 4954 this = self._parse_bitwise() 4955 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4956 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4957 expression = self._parse_bitwise() 4958 4959 if invert_order: 4960 this, expression = expression, this 4961 4962 if self._match(TokenType.COLLATE): 4963 collation = self._parse_bitwise() 4964 4965 return self.expression( 4966 exp.Trim, this=this, position=position, expression=expression, collation=collation 4967 ) 4968 4969 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4970 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4971 4972 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4973 return self._parse_window(self._parse_id_var(), alias=True) 4974 4975 def _parse_respect_or_ignore_nulls( 4976 self, this: t.Optional[exp.Expression] 4977 ) -> t.Optional[exp.Expression]: 4978 if self._match_text_seq("IGNORE", "NULLS"): 4979 return self.expression(exp.IgnoreNulls, this=this) 4980 if self._match_text_seq("RESPECT", "NULLS"): 4981 return self.expression(exp.RespectNulls, this=this) 4982 return this 4983 4984 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4985 if self._match(TokenType.HAVING): 4986 self._match_texts(("MAX", "MIN")) 4987 max = self._prev.text.upper() != "MIN" 4988 return self.expression( 4989 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4990 ) 4991 4992 return this 4993 4994 def _parse_window( 4995 self, this: t.Optional[exp.Expression], alias: bool = False 4996 ) -> t.Optional[exp.Expression]: 4997 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4998 self._match(TokenType.WHERE) 4999 this = self.expression( 5000 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5001 ) 5002 self._match_r_paren() 5003 5004 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5005 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5006 if self._match_text_seq("WITHIN", "GROUP"): 5007 order = self._parse_wrapped(self._parse_order) 5008 this = self.expression(exp.WithinGroup, this=this, expression=order) 5009 5010 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5011 # Some dialects choose to implement and some do not. 5012 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5013 5014 # There is some code above in _parse_lambda that handles 5015 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5016 5017 # The below changes handle 5018 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5019 5020 # Oracle allows both formats 5021 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5022 # and Snowflake chose to do the same for familiarity 5023 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5024 if isinstance(this, exp.AggFunc): 5025 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5026 5027 if ignore_respect and ignore_respect is not this: 5028 ignore_respect.replace(ignore_respect.this) 5029 this = self.expression(ignore_respect.__class__, this=this) 5030 5031 this = self._parse_respect_or_ignore_nulls(this) 5032 5033 # bigquery select from window x AS (partition by ...) 5034 if alias: 5035 over = None 5036 self._match(TokenType.ALIAS) 5037 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5038 return this 5039 else: 5040 over = self._prev.text.upper() 5041 5042 if not self._match(TokenType.L_PAREN): 5043 return self.expression( 5044 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5045 ) 5046 5047 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5048 5049 first = self._match(TokenType.FIRST) 5050 if self._match_text_seq("LAST"): 5051 first = False 5052 5053 partition, order = self._parse_partition_and_order() 5054 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5055 5056 if kind: 5057 self._match(TokenType.BETWEEN) 5058 start = self._parse_window_spec() 5059 self._match(TokenType.AND) 5060 end = self._parse_window_spec() 5061 5062 spec = self.expression( 5063 exp.WindowSpec, 5064 kind=kind, 5065 start=start["value"], 5066 start_side=start["side"], 5067 end=end["value"], 5068 end_side=end["side"], 5069 ) 5070 else: 5071 spec = None 5072 5073 self._match_r_paren() 5074 5075 window = self.expression( 5076 exp.Window, 5077 this=this, 5078 partition_by=partition, 5079 order=order, 5080 spec=spec, 5081 alias=window_alias, 5082 over=over, 5083 first=first, 5084 ) 5085 5086 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5087 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5088 return self._parse_window(window, alias=alias) 5089 5090 return window 5091 5092 def _parse_partition_and_order( 5093 self, 5094 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5095 return self._parse_partition_by(), self._parse_order() 5096 5097 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5098 self._match(TokenType.BETWEEN) 5099 5100 return { 5101 "value": ( 5102 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5103 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5104 or self._parse_bitwise() 5105 ), 5106 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5107 } 5108 5109 def _parse_alias( 5110 self, this: t.Optional[exp.Expression], explicit: bool = False 5111 ) -> t.Optional[exp.Expression]: 5112 any_token = self._match(TokenType.ALIAS) 5113 comments = self._prev_comments 5114 5115 if explicit and not any_token: 5116 return this 5117 5118 if self._match(TokenType.L_PAREN): 5119 aliases = self.expression( 5120 exp.Aliases, 5121 comments=comments, 5122 this=this, 5123 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5124 ) 5125 self._match_r_paren(aliases) 5126 return aliases 5127 5128 alias = self._parse_id_var(any_token) or ( 5129 self.STRING_ALIASES and self._parse_string_as_identifier() 5130 ) 5131 5132 if alias: 5133 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5134 column = this.this 5135 5136 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5137 if not this.comments and column and column.comments: 5138 this.comments = column.comments 5139 column.comments = None 5140 5141 return this 5142 5143 def _parse_id_var( 5144 self, 5145 any_token: bool = True, 5146 tokens: t.Optional[t.Collection[TokenType]] = None, 5147 ) -> t.Optional[exp.Expression]: 5148 identifier = self._parse_identifier() 5149 5150 if identifier: 5151 return identifier 5152 5153 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5154 quoted = self._prev.token_type == TokenType.STRING 5155 return exp.Identifier(this=self._prev.text, quoted=quoted) 5156 5157 return None 5158 5159 def _parse_string(self) -> t.Optional[exp.Expression]: 5160 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5161 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5162 return self._parse_placeholder() 5163 5164 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5165 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5166 5167 def _parse_number(self) -> t.Optional[exp.Expression]: 5168 if self._match(TokenType.NUMBER): 5169 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5170 return self._parse_placeholder() 5171 5172 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5173 if self._match(TokenType.IDENTIFIER): 5174 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5175 return self._parse_placeholder() 5176 5177 def _parse_var( 5178 self, 5179 any_token: bool = False, 5180 tokens: t.Optional[t.Collection[TokenType]] = None, 5181 upper: bool = False, 5182 ) -> t.Optional[exp.Expression]: 5183 if ( 5184 (any_token and self._advance_any()) 5185 or self._match(TokenType.VAR) 5186 or (self._match_set(tokens) if tokens else False) 5187 ): 5188 return self.expression( 5189 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5190 ) 5191 return self._parse_placeholder() 5192 5193 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5194 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5195 self._advance() 5196 return self._prev 5197 return None 5198 5199 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5200 return self._parse_var() or self._parse_string() 5201 5202 def _parse_null(self) -> t.Optional[exp.Expression]: 5203 if self._match_set(self.NULL_TOKENS): 5204 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5205 return self._parse_placeholder() 5206 5207 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5208 if self._match(TokenType.TRUE): 5209 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5210 if self._match(TokenType.FALSE): 5211 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5212 return self._parse_placeholder() 5213 5214 def _parse_star(self) -> t.Optional[exp.Expression]: 5215 if self._match(TokenType.STAR): 5216 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5217 return self._parse_placeholder() 5218 5219 def _parse_parameter(self) -> exp.Parameter: 5220 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5221 return ( 5222 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5223 ) 5224 5225 self._match(TokenType.L_BRACE) 5226 this = _parse_parameter_part() 5227 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5228 self._match(TokenType.R_BRACE) 5229 5230 return self.expression(exp.Parameter, this=this, expression=expression) 5231 5232 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5233 if self._match_set(self.PLACEHOLDER_PARSERS): 5234 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5235 if placeholder: 5236 return placeholder 5237 self._advance(-1) 5238 return None 5239 5240 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5241 if not self._match(TokenType.EXCEPT): 5242 return None 5243 if self._match(TokenType.L_PAREN, advance=False): 5244 return self._parse_wrapped_csv(self._parse_column) 5245 5246 except_column = self._parse_column() 5247 return [except_column] if except_column else None 5248 5249 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5250 if not self._match(TokenType.REPLACE): 5251 return None 5252 if self._match(TokenType.L_PAREN, advance=False): 5253 return self._parse_wrapped_csv(self._parse_expression) 5254 5255 replace_expression = self._parse_expression() 5256 return [replace_expression] if replace_expression else None 5257 5258 def _parse_csv( 5259 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5260 ) -> t.List[exp.Expression]: 5261 parse_result = parse_method() 5262 items = [parse_result] if parse_result is not None else [] 5263 5264 while self._match(sep): 5265 self._add_comments(parse_result) 5266 parse_result = parse_method() 5267 if parse_result is not None: 5268 items.append(parse_result) 5269 5270 return items 5271 5272 def _parse_tokens( 5273 self, parse_method: t.Callable, expressions: t.Dict 5274 ) -> t.Optional[exp.Expression]: 5275 this = parse_method() 5276 5277 while self._match_set(expressions): 5278 this = self.expression( 5279 expressions[self._prev.token_type], 5280 this=this, 5281 comments=self._prev_comments, 5282 expression=parse_method(), 5283 ) 5284 5285 return this 5286 5287 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5288 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5289 5290 def _parse_wrapped_csv( 5291 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5292 ) -> t.List[exp.Expression]: 5293 return self._parse_wrapped( 5294 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5295 ) 5296 5297 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5298 wrapped = self._match(TokenType.L_PAREN) 5299 if not wrapped and not optional: 5300 self.raise_error("Expecting (") 5301 parse_result = parse_method() 5302 if wrapped: 5303 self._match_r_paren() 5304 return parse_result 5305 5306 def _parse_expressions(self) -> t.List[exp.Expression]: 5307 return self._parse_csv(self._parse_expression) 5308 5309 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5310 return self._parse_select() or self._parse_set_operations( 5311 self._parse_expression() if alias else self._parse_conjunction() 5312 ) 5313 5314 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5315 return self._parse_query_modifiers( 5316 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5317 ) 5318 5319 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5320 this = None 5321 if self._match_texts(self.TRANSACTION_KIND): 5322 this = self._prev.text 5323 5324 self._match_texts(("TRANSACTION", "WORK")) 5325 5326 modes = [] 5327 while True: 5328 mode = [] 5329 while self._match(TokenType.VAR): 5330 mode.append(self._prev.text) 5331 5332 if mode: 5333 modes.append(" ".join(mode)) 5334 if not self._match(TokenType.COMMA): 5335 break 5336 5337 return self.expression(exp.Transaction, this=this, modes=modes) 5338 5339 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5340 chain = None 5341 savepoint = None 5342 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5343 5344 self._match_texts(("TRANSACTION", "WORK")) 5345 5346 if self._match_text_seq("TO"): 5347 self._match_text_seq("SAVEPOINT") 5348 savepoint = self._parse_id_var() 5349 5350 if self._match(TokenType.AND): 5351 chain = not self._match_text_seq("NO") 5352 self._match_text_seq("CHAIN") 5353 5354 if is_rollback: 5355 return self.expression(exp.Rollback, savepoint=savepoint) 5356 5357 return self.expression(exp.Commit, chain=chain) 5358 5359 def _parse_refresh(self) -> exp.Refresh: 5360 self._match(TokenType.TABLE) 5361 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5362 5363 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5364 if not self._match_text_seq("ADD"): 5365 return None 5366 5367 self._match(TokenType.COLUMN) 5368 exists_column = self._parse_exists(not_=True) 5369 expression = self._parse_field_def() 5370 5371 if expression: 5372 expression.set("exists", exists_column) 5373 5374 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5375 if self._match_texts(("FIRST", "AFTER")): 5376 position = self._prev.text 5377 column_position = self.expression( 5378 exp.ColumnPosition, this=self._parse_column(), position=position 5379 ) 5380 expression.set("position", column_position) 5381 5382 return expression 5383 5384 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5385 drop = self._match(TokenType.DROP) and self._parse_drop() 5386 if drop and not isinstance(drop, exp.Command): 5387 drop.set("kind", drop.args.get("kind", "COLUMN")) 5388 return drop 5389 5390 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5391 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5392 return self.expression( 5393 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5394 ) 5395 5396 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5397 index = self._index - 1 5398 5399 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5400 return self._parse_csv( 5401 lambda: self.expression( 5402 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5403 ) 5404 ) 5405 5406 self._retreat(index) 5407 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5408 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5409 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5410 5411 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5412 self._match(TokenType.COLUMN) 5413 column = self._parse_field(any_token=True) 5414 5415 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5416 return self.expression(exp.AlterColumn, this=column, drop=True) 5417 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5418 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5419 if self._match(TokenType.COMMENT): 5420 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5421 5422 self._match_text_seq("SET", "DATA") 5423 return self.expression( 5424 exp.AlterColumn, 5425 this=column, 5426 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5427 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5428 using=self._match(TokenType.USING) and self._parse_conjunction(), 5429 ) 5430 5431 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5432 index = self._index - 1 5433 5434 partition_exists = self._parse_exists() 5435 if self._match(TokenType.PARTITION, advance=False): 5436 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5437 5438 self._retreat(index) 5439 return self._parse_csv(self._parse_drop_column) 5440 5441 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5442 if self._match(TokenType.COLUMN): 5443 exists = self._parse_exists() 5444 old_column = self._parse_column() 5445 to = self._match_text_seq("TO") 5446 new_column = self._parse_column() 5447 5448 if old_column is None or to is None or new_column is None: 5449 return None 5450 5451 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5452 5453 self._match_text_seq("TO") 5454 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5455 5456 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5457 start = self._prev 5458 5459 if not self._match(TokenType.TABLE): 5460 return self._parse_as_command(start) 5461 5462 exists = self._parse_exists() 5463 only = self._match_text_seq("ONLY") 5464 this = self._parse_table(schema=True) 5465 5466 if self._next: 5467 self._advance() 5468 5469 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5470 if parser: 5471 actions = ensure_list(parser(self)) 5472 5473 if not self._curr and actions: 5474 return self.expression( 5475 exp.AlterTable, 5476 this=this, 5477 exists=exists, 5478 actions=actions, 5479 only=only, 5480 ) 5481 5482 return self._parse_as_command(start) 5483 5484 def _parse_merge(self) -> exp.Merge: 5485 self._match(TokenType.INTO) 5486 target = self._parse_table() 5487 5488 if target and self._match(TokenType.ALIAS, advance=False): 5489 target.set("alias", self._parse_table_alias()) 5490 5491 self._match(TokenType.USING) 5492 using = self._parse_table() 5493 5494 self._match(TokenType.ON) 5495 on = self._parse_conjunction() 5496 5497 return self.expression( 5498 exp.Merge, 5499 this=target, 5500 using=using, 5501 on=on, 5502 expressions=self._parse_when_matched(), 5503 ) 5504 5505 def _parse_when_matched(self) -> t.List[exp.When]: 5506 whens = [] 5507 5508 while self._match(TokenType.WHEN): 5509 matched = not self._match(TokenType.NOT) 5510 self._match_text_seq("MATCHED") 5511 source = ( 5512 False 5513 if self._match_text_seq("BY", "TARGET") 5514 else self._match_text_seq("BY", "SOURCE") 5515 ) 5516 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5517 5518 self._match(TokenType.THEN) 5519 5520 if self._match(TokenType.INSERT): 5521 _this = self._parse_star() 5522 if _this: 5523 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5524 else: 5525 then = self.expression( 5526 exp.Insert, 5527 this=self._parse_value(), 5528 expression=self._match_text_seq("VALUES") and self._parse_value(), 5529 ) 5530 elif self._match(TokenType.UPDATE): 5531 expressions = self._parse_star() 5532 if expressions: 5533 then = self.expression(exp.Update, expressions=expressions) 5534 else: 5535 then = self.expression( 5536 exp.Update, 5537 expressions=self._match(TokenType.SET) 5538 and self._parse_csv(self._parse_equality), 5539 ) 5540 elif self._match(TokenType.DELETE): 5541 then = self.expression(exp.Var, this=self._prev.text) 5542 else: 5543 then = None 5544 5545 whens.append( 5546 self.expression( 5547 exp.When, 5548 matched=matched, 5549 source=source, 5550 condition=condition, 5551 then=then, 5552 ) 5553 ) 5554 return whens 5555 5556 def _parse_show(self) -> t.Optional[exp.Expression]: 5557 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5558 if parser: 5559 return parser(self) 5560 return self._parse_as_command(self._prev) 5561 5562 def _parse_set_item_assignment( 5563 self, kind: t.Optional[str] = None 5564 ) -> t.Optional[exp.Expression]: 5565 index = self._index 5566 5567 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5568 return self._parse_set_transaction(global_=kind == "GLOBAL") 5569 5570 left = self._parse_primary() or self._parse_id_var() 5571 assignment_delimiter = self._match_texts(("=", "TO")) 5572 5573 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5574 self._retreat(index) 5575 return None 5576 5577 right = self._parse_statement() or self._parse_id_var() 5578 this = self.expression(exp.EQ, this=left, expression=right) 5579 5580 return self.expression(exp.SetItem, this=this, kind=kind) 5581 5582 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5583 self._match_text_seq("TRANSACTION") 5584 characteristics = self._parse_csv( 5585 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5586 ) 5587 return self.expression( 5588 exp.SetItem, 5589 expressions=characteristics, 5590 kind="TRANSACTION", 5591 **{"global": global_}, # type: ignore 5592 ) 5593 5594 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5595 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5596 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5597 5598 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5599 index = self._index 5600 set_ = self.expression( 5601 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5602 ) 5603 5604 if self._curr: 5605 self._retreat(index) 5606 return self._parse_as_command(self._prev) 5607 5608 return set_ 5609 5610 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5611 for option in options: 5612 if self._match_text_seq(*option.split(" ")): 5613 return exp.var(option) 5614 return None 5615 5616 def _parse_as_command(self, start: Token) -> exp.Command: 5617 while self._curr: 5618 self._advance() 5619 text = self._find_sql(start, self._prev) 5620 size = len(start.text) 5621 self._warn_unsupported() 5622 return exp.Command(this=text[:size], expression=text[size:]) 5623 5624 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5625 settings = [] 5626 5627 self._match_l_paren() 5628 kind = self._parse_id_var() 5629 5630 if self._match(TokenType.L_PAREN): 5631 while True: 5632 key = self._parse_id_var() 5633 value = self._parse_primary() 5634 5635 if not key and value is None: 5636 break 5637 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5638 self._match(TokenType.R_PAREN) 5639 5640 self._match_r_paren() 5641 5642 return self.expression( 5643 exp.DictProperty, 5644 this=this, 5645 kind=kind.this if kind else None, 5646 settings=settings, 5647 ) 5648 5649 def _parse_dict_range(self, this: str) -> exp.DictRange: 5650 self._match_l_paren() 5651 has_min = self._match_text_seq("MIN") 5652 if has_min: 5653 min = self._parse_var() or self._parse_primary() 5654 self._match_text_seq("MAX") 5655 max = self._parse_var() or self._parse_primary() 5656 else: 5657 max = self._parse_var() or self._parse_primary() 5658 min = exp.Literal.number(0) 5659 self._match_r_paren() 5660 return self.expression(exp.DictRange, this=this, min=min, max=max) 5661 5662 def _parse_comprehension( 5663 self, this: t.Optional[exp.Expression] 5664 ) -> t.Optional[exp.Comprehension]: 5665 index = self._index 5666 expression = self._parse_column() 5667 if not self._match(TokenType.IN): 5668 self._retreat(index - 1) 5669 return None 5670 iterator = self._parse_column() 5671 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5672 return self.expression( 5673 exp.Comprehension, 5674 this=this, 5675 expression=expression, 5676 iterator=iterator, 5677 condition=condition, 5678 ) 5679 5680 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5681 if self._match(TokenType.HEREDOC_STRING): 5682 return self.expression(exp.Heredoc, this=self._prev.text) 5683 5684 if not self._match_text_seq("$"): 5685 return None 5686 5687 tags = ["$"] 5688 tag_text = None 5689 5690 if self._is_connected(): 5691 self._advance() 5692 tags.append(self._prev.text.upper()) 5693 else: 5694 self.raise_error("No closing $ found") 5695 5696 if tags[-1] != "$": 5697 if self._is_connected() and self._match_text_seq("$"): 5698 tag_text = tags[-1] 5699 tags.append("$") 5700 else: 5701 self.raise_error("No closing $ found") 5702 5703 heredoc_start = self._curr 5704 5705 while self._curr: 5706 if self._match_text_seq(*tags, advance=False): 5707 this = self._find_sql(heredoc_start, self._prev) 5708 self._advance(len(tags)) 5709 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5710 5711 self._advance() 5712 5713 self.raise_error(f"No closing {''.join(tags)} found") 5714 return None 5715 5716 def _find_parser( 5717 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5718 ) -> t.Optional[t.Callable]: 5719 if not self._curr: 5720 return None 5721 5722 index = self._index 5723 this = [] 5724 while True: 5725 # The current token might be multiple words 5726 curr = self._curr.text.upper() 5727 key = curr.split(" ") 5728 this.append(curr) 5729 5730 self._advance() 5731 result, trie = in_trie(trie, key) 5732 if result == TrieResult.FAILED: 5733 break 5734 5735 if result == TrieResult.EXISTS: 5736 subparser = parsers[" ".join(this)] 5737 return subparser 5738 5739 self._retreat(index) 5740 return None 5741 5742 def _match(self, token_type, advance=True, expression=None): 5743 if not self._curr: 5744 return None 5745 5746 if self._curr.token_type == token_type: 5747 if advance: 5748 self._advance() 5749 self._add_comments(expression) 5750 return True 5751 5752 return None 5753 5754 def _match_set(self, types, advance=True): 5755 if not self._curr: 5756 return None 5757 5758 if self._curr.token_type in types: 5759 if advance: 5760 self._advance() 5761 return True 5762 5763 return None 5764 5765 def _match_pair(self, token_type_a, token_type_b, advance=True): 5766 if not self._curr or not self._next: 5767 return None 5768 5769 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5770 if advance: 5771 self._advance(2) 5772 return True 5773 5774 return None 5775 5776 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5777 if not self._match(TokenType.L_PAREN, expression=expression): 5778 self.raise_error("Expecting (") 5779 5780 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5781 if not self._match(TokenType.R_PAREN, expression=expression): 5782 self.raise_error("Expecting )") 5783 5784 def _match_texts(self, texts, advance=True): 5785 if self._curr and self._curr.text.upper() in texts: 5786 if advance: 5787 self._advance() 5788 return True 5789 return None 5790 5791 def _match_text_seq(self, *texts, advance=True): 5792 index = self._index 5793 for text in texts: 5794 if self._curr and self._curr.text.upper() == text: 5795 self._advance() 5796 else: 5797 self._retreat(index) 5798 return None 5799 5800 if not advance: 5801 self._retreat(index) 5802 5803 return True 5804 5805 @t.overload 5806 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5807 ... 5808 5809 @t.overload 5810 def _replace_columns_with_dots( 5811 self, this: t.Optional[exp.Expression] 5812 ) -> t.Optional[exp.Expression]: 5813 ... 5814 5815 def _replace_columns_with_dots(self, this): 5816 if isinstance(this, exp.Dot): 5817 exp.replace_children(this, self._replace_columns_with_dots) 5818 elif isinstance(this, exp.Column): 5819 exp.replace_children(this, self._replace_columns_with_dots) 5820 table = this.args.get("table") 5821 this = ( 5822 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5823 ) 5824 5825 return this 5826 5827 def _replace_lambda( 5828 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5829 ) -> t.Optional[exp.Expression]: 5830 if not node: 5831 return node 5832 5833 for column in node.find_all(exp.Column): 5834 if column.parts[0].name in lambda_variables: 5835 dot_or_id = column.to_dot() if column.table else column.this 5836 parent = column.parent 5837 5838 while isinstance(parent, exp.Dot): 5839 if not isinstance(parent.parent, exp.Dot): 5840 parent.replace(dot_or_id) 5841 break 5842 parent = parent.parent 5843 else: 5844 if column is node: 5845 node = dot_or_id 5846 else: 5847 column.replace(dot_or_id) 5848 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1037 def __init__( 1038 self, 1039 error_level: t.Optional[ErrorLevel] = None, 1040 error_message_context: int = 100, 1041 max_errors: int = 3, 1042 dialect: DialectType = None, 1043 ): 1044 from sqlglot.dialects import Dialect 1045 1046 self.error_level = error_level or ErrorLevel.IMMEDIATE 1047 self.error_message_context = error_message_context 1048 self.max_errors = max_errors 1049 self.dialect = Dialect.get_or_raise(dialect) 1050 self.reset()
1062 def parse( 1063 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1064 ) -> t.List[t.Optional[exp.Expression]]: 1065 """ 1066 Parses a list of tokens and returns a list of syntax trees, one tree 1067 per parsed SQL statement. 1068 1069 Args: 1070 raw_tokens: The list of tokens. 1071 sql: The original SQL string, used to produce helpful debug messages. 1072 1073 Returns: 1074 The list of the produced syntax trees. 1075 """ 1076 return self._parse( 1077 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1078 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1080 def parse_into( 1081 self, 1082 expression_types: exp.IntoType, 1083 raw_tokens: t.List[Token], 1084 sql: t.Optional[str] = None, 1085 ) -> t.List[t.Optional[exp.Expression]]: 1086 """ 1087 Parses a list of tokens into a given Expression type. If a collection of Expression 1088 types is given instead, this method will try to parse the token list into each one 1089 of them, stopping at the first for which the parsing succeeds. 1090 1091 Args: 1092 expression_types: The expression type(s) to try and parse the token list into. 1093 raw_tokens: The list of tokens. 1094 sql: The original SQL string, used to produce helpful debug messages. 1095 1096 Returns: 1097 The target Expression. 1098 """ 1099 errors = [] 1100 for expression_type in ensure_list(expression_types): 1101 parser = self.EXPRESSION_PARSERS.get(expression_type) 1102 if not parser: 1103 raise TypeError(f"No parser registered for {expression_type}") 1104 1105 try: 1106 return self._parse(parser, raw_tokens, sql) 1107 except ParseError as e: 1108 e.errors[0]["into_expression"] = expression_type 1109 errors.append(e) 1110 1111 raise ParseError( 1112 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1113 errors=merge_errors(errors), 1114 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1151 def check_errors(self) -> None: 1152 """Logs or raises any found errors, depending on the chosen error level setting.""" 1153 if self.error_level == ErrorLevel.WARN: 1154 for error in self.errors: 1155 logger.error(str(error)) 1156 elif self.error_level == ErrorLevel.RAISE and self.errors: 1157 raise ParseError( 1158 concat_messages(self.errors, self.max_errors), 1159 errors=merge_errors(self.errors), 1160 )
Logs or raises any found errors, depending on the chosen error level setting.
1162 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1163 """ 1164 Appends an error in the list of recorded errors or raises it, depending on the chosen 1165 error level setting. 1166 """ 1167 token = token or self._curr or self._prev or Token.string("") 1168 start = token.start 1169 end = token.end + 1 1170 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1171 highlight = self.sql[start:end] 1172 end_context = self.sql[end : end + self.error_message_context] 1173 1174 error = ParseError.new( 1175 f"{message}. Line {token.line}, Col: {token.col}.\n" 1176 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1177 description=message, 1178 line=token.line, 1179 col=token.col, 1180 start_context=start_context, 1181 highlight=highlight, 1182 end_context=end_context, 1183 ) 1184 1185 if self.error_level == ErrorLevel.IMMEDIATE: 1186 raise error 1187 1188 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1190 def expression( 1191 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1192 ) -> E: 1193 """ 1194 Creates a new, validated Expression. 1195 1196 Args: 1197 exp_class: The expression class to instantiate. 1198 comments: An optional list of comments to attach to the expression. 1199 kwargs: The arguments to set for the expression along with their respective values. 1200 1201 Returns: 1202 The target expression. 1203 """ 1204 instance = exp_class(**kwargs) 1205 instance.add_comments(comments) if comments else self._add_comments(instance) 1206 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1213 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1214 """ 1215 Validates an Expression, making sure that all its mandatory arguments are set. 1216 1217 Args: 1218 expression: The expression to validate. 1219 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1220 1221 Returns: 1222 The validated expression. 1223 """ 1224 if self.error_level != ErrorLevel.IGNORE: 1225 for error_message in expression.error_messages(args): 1226 self.raise_error(error_message) 1227 1228 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.