sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IDENTIFIER, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EPHEMERAL": lambda self: self.expression( 857 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 858 ), 859 "EXCLUDE": lambda self: self.expression( 860 exp.ExcludeColumnConstraint, this=self._parse_index_params() 861 ), 862 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 863 "FORMAT": lambda self: self.expression( 864 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 865 ), 866 "GENERATED": lambda self: self._parse_generated_as_identity(), 867 "IDENTITY": lambda self: self._parse_auto_increment(), 868 "INLINE": lambda self: self._parse_inline(), 869 "LIKE": lambda self: self._parse_create_like(), 870 "NOT": lambda self: self._parse_not_constraint(), 871 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 872 "ON": lambda self: ( 873 self._match(TokenType.UPDATE) 874 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 875 ) 876 or self.expression(exp.OnProperty, this=self._parse_id_var()), 877 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 878 "PERIOD": lambda self: self._parse_period_for_system_time(), 879 "PRIMARY KEY": lambda self: self._parse_primary_key(), 880 "REFERENCES": lambda self: self._parse_references(match=False), 881 "TITLE": lambda self: self.expression( 882 exp.TitleColumnConstraint, this=self._parse_var_or_string() 883 ), 884 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 885 "UNIQUE": lambda self: self._parse_unique(), 886 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 887 "WITH": lambda self: self.expression( 888 exp.Properties, expressions=self._parse_wrapped_properties() 889 ), 890 } 891 892 ALTER_PARSERS = { 893 "ADD": lambda self: self._parse_alter_table_add(), 894 "ALTER": lambda self: self._parse_alter_table_alter(), 895 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 896 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 897 "DROP": lambda self: self._parse_alter_table_drop(), 898 "RENAME": lambda self: self._parse_alter_table_rename(), 899 } 900 901 SCHEMA_UNNAMED_CONSTRAINTS = { 902 "CHECK", 903 "EXCLUDE", 904 "FOREIGN KEY", 905 "LIKE", 906 "PERIOD", 907 "PRIMARY KEY", 908 "UNIQUE", 909 } 910 911 NO_PAREN_FUNCTION_PARSERS = { 912 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 913 "CASE": lambda self: self._parse_case(), 914 "IF": lambda self: self._parse_if(), 915 "NEXT": lambda self: self._parse_next_value_for(), 916 } 917 918 INVALID_FUNC_NAME_TOKENS = { 919 TokenType.IDENTIFIER, 920 TokenType.STRING, 921 } 922 923 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 924 925 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 926 927 FUNCTION_PARSERS = { 928 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 929 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 930 "DECODE": lambda self: self._parse_decode(), 931 "EXTRACT": lambda self: self._parse_extract(), 932 "JSON_OBJECT": lambda self: self._parse_json_object(), 933 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 934 "JSON_TABLE": lambda self: self._parse_json_table(), 935 "MATCH": lambda self: self._parse_match_against(), 936 "OPENJSON": lambda self: self._parse_open_json(), 937 "POSITION": lambda self: self._parse_position(), 938 "PREDICT": lambda self: self._parse_predict(), 939 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 940 "STRING_AGG": lambda self: self._parse_string_agg(), 941 "SUBSTRING": lambda self: self._parse_substring(), 942 "TRIM": lambda self: self._parse_trim(), 943 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 944 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 945 } 946 947 QUERY_MODIFIER_PARSERS = { 948 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 949 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 950 TokenType.WHERE: lambda self: ("where", self._parse_where()), 951 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 952 TokenType.HAVING: lambda self: ("having", self._parse_having()), 953 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 954 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 955 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 956 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 957 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 958 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 959 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 960 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 961 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 962 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.CLUSTER_BY: lambda self: ( 964 "cluster", 965 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 966 ), 967 TokenType.DISTRIBUTE_BY: lambda self: ( 968 "distribute", 969 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 970 ), 971 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 972 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 973 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 974 } 975 976 SET_PARSERS = { 977 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 978 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 979 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 980 "TRANSACTION": lambda self: self._parse_set_transaction(), 981 } 982 983 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 984 985 TYPE_LITERAL_PARSERS = { 986 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 987 } 988 989 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 990 991 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 992 993 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 994 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 995 "ISOLATION": ( 996 ("LEVEL", "REPEATABLE", "READ"), 997 ("LEVEL", "READ", "COMMITTED"), 998 ("LEVEL", "READ", "UNCOMITTED"), 999 ("LEVEL", "SERIALIZABLE"), 1000 ), 1001 "READ": ("WRITE", "ONLY"), 1002 } 1003 1004 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1005 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1006 ) 1007 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1008 1009 CREATE_SEQUENCE: OPTIONS_TYPE = { 1010 "SCALE": ("EXTEND", "NOEXTEND"), 1011 "SHARD": ("EXTEND", "NOEXTEND"), 1012 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1013 **dict.fromkeys( 1014 ( 1015 "SESSION", 1016 "GLOBAL", 1017 "KEEP", 1018 "NOKEEP", 1019 "ORDER", 1020 "NOORDER", 1021 "NOCACHE", 1022 "CYCLE", 1023 "NOCYCLE", 1024 "NOMINVALUE", 1025 "NOMAXVALUE", 1026 "NOSCALE", 1027 "NOSHARD", 1028 ), 1029 tuple(), 1030 ), 1031 } 1032 1033 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1034 1035 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1036 1037 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1038 1039 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1040 1041 CLONE_KEYWORDS = {"CLONE", "COPY"} 1042 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1043 1044 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1045 1046 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1047 1048 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1049 1050 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1051 1052 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1053 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1054 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1055 1056 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1057 1058 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1059 1060 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1061 1062 DISTINCT_TOKENS = {TokenType.DISTINCT} 1063 1064 NULL_TOKENS = {TokenType.NULL} 1065 1066 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1067 1068 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1069 1070 STRICT_CAST = True 1071 1072 PREFIXED_PIVOT_COLUMNS = False 1073 IDENTIFY_PIVOT_STRINGS = False 1074 1075 LOG_DEFAULTS_TO_LN = False 1076 1077 # Whether ADD is present for each column added by ALTER TABLE 1078 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1079 1080 # Whether the table sample clause expects CSV syntax 1081 TABLESAMPLE_CSV = False 1082 1083 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1084 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1085 1086 # Whether the TRIM function expects the characters to trim as its first argument 1087 TRIM_PATTERN_FIRST = False 1088 1089 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1090 STRING_ALIASES = False 1091 1092 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1093 MODIFIERS_ATTACHED_TO_UNION = True 1094 UNION_MODIFIERS = {"order", "limit", "offset"} 1095 1096 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1097 NO_PAREN_IF_COMMANDS = True 1098 1099 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1100 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1101 1102 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1103 # If this is True and '(' is not found, the keyword will be treated as an identifier 1104 VALUES_FOLLOWED_BY_PAREN = True 1105 1106 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1107 SUPPORTS_IMPLICIT_UNNEST = False 1108 1109 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1110 INTERVAL_SPANS = True 1111 1112 # Whether a PARTITION clause can follow a table reference 1113 SUPPORTS_PARTITION_SELECTION = False 1114 1115 __slots__ = ( 1116 "error_level", 1117 "error_message_context", 1118 "max_errors", 1119 "dialect", 1120 "sql", 1121 "errors", 1122 "_tokens", 1123 "_index", 1124 "_curr", 1125 "_next", 1126 "_prev", 1127 "_prev_comments", 1128 ) 1129 1130 # Autofilled 1131 SHOW_TRIE: t.Dict = {} 1132 SET_TRIE: t.Dict = {} 1133 1134 def __init__( 1135 self, 1136 error_level: t.Optional[ErrorLevel] = None, 1137 error_message_context: int = 100, 1138 max_errors: int = 3, 1139 dialect: DialectType = None, 1140 ): 1141 from sqlglot.dialects import Dialect 1142 1143 self.error_level = error_level or ErrorLevel.IMMEDIATE 1144 self.error_message_context = error_message_context 1145 self.max_errors = max_errors 1146 self.dialect = Dialect.get_or_raise(dialect) 1147 self.reset() 1148 1149 def reset(self): 1150 self.sql = "" 1151 self.errors = [] 1152 self._tokens = [] 1153 self._index = 0 1154 self._curr = None 1155 self._next = None 1156 self._prev = None 1157 self._prev_comments = None 1158 1159 def parse( 1160 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1161 ) -> t.List[t.Optional[exp.Expression]]: 1162 """ 1163 Parses a list of tokens and returns a list of syntax trees, one tree 1164 per parsed SQL statement. 1165 1166 Args: 1167 raw_tokens: The list of tokens. 1168 sql: The original SQL string, used to produce helpful debug messages. 1169 1170 Returns: 1171 The list of the produced syntax trees. 1172 """ 1173 return self._parse( 1174 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1175 ) 1176 1177 def parse_into( 1178 self, 1179 expression_types: exp.IntoType, 1180 raw_tokens: t.List[Token], 1181 sql: t.Optional[str] = None, 1182 ) -> t.List[t.Optional[exp.Expression]]: 1183 """ 1184 Parses a list of tokens into a given Expression type. If a collection of Expression 1185 types is given instead, this method will try to parse the token list into each one 1186 of them, stopping at the first for which the parsing succeeds. 1187 1188 Args: 1189 expression_types: The expression type(s) to try and parse the token list into. 1190 raw_tokens: The list of tokens. 1191 sql: The original SQL string, used to produce helpful debug messages. 1192 1193 Returns: 1194 The target Expression. 1195 """ 1196 errors = [] 1197 for expression_type in ensure_list(expression_types): 1198 parser = self.EXPRESSION_PARSERS.get(expression_type) 1199 if not parser: 1200 raise TypeError(f"No parser registered for {expression_type}") 1201 1202 try: 1203 return self._parse(parser, raw_tokens, sql) 1204 except ParseError as e: 1205 e.errors[0]["into_expression"] = expression_type 1206 errors.append(e) 1207 1208 raise ParseError( 1209 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1210 errors=merge_errors(errors), 1211 ) from errors[-1] 1212 1213 def _parse( 1214 self, 1215 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1216 raw_tokens: t.List[Token], 1217 sql: t.Optional[str] = None, 1218 ) -> t.List[t.Optional[exp.Expression]]: 1219 self.reset() 1220 self.sql = sql or "" 1221 1222 total = len(raw_tokens) 1223 chunks: t.List[t.List[Token]] = [[]] 1224 1225 for i, token in enumerate(raw_tokens): 1226 if token.token_type == TokenType.SEMICOLON: 1227 if i < total - 1: 1228 chunks.append([]) 1229 else: 1230 chunks[-1].append(token) 1231 1232 expressions = [] 1233 1234 for tokens in chunks: 1235 self._index = -1 1236 self._tokens = tokens 1237 self._advance() 1238 1239 expressions.append(parse_method(self)) 1240 1241 if self._index < len(self._tokens): 1242 self.raise_error("Invalid expression / Unexpected token") 1243 1244 self.check_errors() 1245 1246 return expressions 1247 1248 def check_errors(self) -> None: 1249 """Logs or raises any found errors, depending on the chosen error level setting.""" 1250 if self.error_level == ErrorLevel.WARN: 1251 for error in self.errors: 1252 logger.error(str(error)) 1253 elif self.error_level == ErrorLevel.RAISE and self.errors: 1254 raise ParseError( 1255 concat_messages(self.errors, self.max_errors), 1256 errors=merge_errors(self.errors), 1257 ) 1258 1259 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1260 """ 1261 Appends an error in the list of recorded errors or raises it, depending on the chosen 1262 error level setting. 1263 """ 1264 token = token or self._curr or self._prev or Token.string("") 1265 start = token.start 1266 end = token.end + 1 1267 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1268 highlight = self.sql[start:end] 1269 end_context = self.sql[end : end + self.error_message_context] 1270 1271 error = ParseError.new( 1272 f"{message}. Line {token.line}, Col: {token.col}.\n" 1273 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1274 description=message, 1275 line=token.line, 1276 col=token.col, 1277 start_context=start_context, 1278 highlight=highlight, 1279 end_context=end_context, 1280 ) 1281 1282 if self.error_level == ErrorLevel.IMMEDIATE: 1283 raise error 1284 1285 self.errors.append(error) 1286 1287 def expression( 1288 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1289 ) -> E: 1290 """ 1291 Creates a new, validated Expression. 1292 1293 Args: 1294 exp_class: The expression class to instantiate. 1295 comments: An optional list of comments to attach to the expression. 1296 kwargs: The arguments to set for the expression along with their respective values. 1297 1298 Returns: 1299 The target expression. 1300 """ 1301 instance = exp_class(**kwargs) 1302 instance.add_comments(comments) if comments else self._add_comments(instance) 1303 return self.validate_expression(instance) 1304 1305 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1306 if expression and self._prev_comments: 1307 expression.add_comments(self._prev_comments) 1308 self._prev_comments = None 1309 1310 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1311 """ 1312 Validates an Expression, making sure that all its mandatory arguments are set. 1313 1314 Args: 1315 expression: The expression to validate. 1316 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1317 1318 Returns: 1319 The validated expression. 1320 """ 1321 if self.error_level != ErrorLevel.IGNORE: 1322 for error_message in expression.error_messages(args): 1323 self.raise_error(error_message) 1324 1325 return expression 1326 1327 def _find_sql(self, start: Token, end: Token) -> str: 1328 return self.sql[start.start : end.end + 1] 1329 1330 def _is_connected(self) -> bool: 1331 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1332 1333 def _advance(self, times: int = 1) -> None: 1334 self._index += times 1335 self._curr = seq_get(self._tokens, self._index) 1336 self._next = seq_get(self._tokens, self._index + 1) 1337 1338 if self._index > 0: 1339 self._prev = self._tokens[self._index - 1] 1340 self._prev_comments = self._prev.comments 1341 else: 1342 self._prev = None 1343 self._prev_comments = None 1344 1345 def _retreat(self, index: int) -> None: 1346 if index != self._index: 1347 self._advance(index - self._index) 1348 1349 def _warn_unsupported(self) -> None: 1350 if len(self._tokens) <= 1: 1351 return 1352 1353 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1354 # interested in emitting a warning for the one being currently processed. 1355 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1356 1357 logger.warning( 1358 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1359 ) 1360 1361 def _parse_command(self) -> exp.Command: 1362 self._warn_unsupported() 1363 return self.expression( 1364 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1365 ) 1366 1367 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1368 """ 1369 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1370 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1371 the parser state accordingly 1372 """ 1373 index = self._index 1374 error_level = self.error_level 1375 1376 self.error_level = ErrorLevel.IMMEDIATE 1377 try: 1378 this = parse_method() 1379 except ParseError: 1380 this = None 1381 finally: 1382 if not this or retreat: 1383 self._retreat(index) 1384 self.error_level = error_level 1385 1386 return this 1387 1388 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1389 start = self._prev 1390 exists = self._parse_exists() if allow_exists else None 1391 1392 self._match(TokenType.ON) 1393 1394 materialized = self._match_text_seq("MATERIALIZED") 1395 kind = self._match_set(self.CREATABLES) and self._prev 1396 if not kind: 1397 return self._parse_as_command(start) 1398 1399 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1400 this = self._parse_user_defined_function(kind=kind.token_type) 1401 elif kind.token_type == TokenType.TABLE: 1402 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1403 elif kind.token_type == TokenType.COLUMN: 1404 this = self._parse_column() 1405 else: 1406 this = self._parse_id_var() 1407 1408 self._match(TokenType.IS) 1409 1410 return self.expression( 1411 exp.Comment, 1412 this=this, 1413 kind=kind.text, 1414 expression=self._parse_string(), 1415 exists=exists, 1416 materialized=materialized, 1417 ) 1418 1419 def _parse_to_table( 1420 self, 1421 ) -> exp.ToTableProperty: 1422 table = self._parse_table_parts(schema=True) 1423 return self.expression(exp.ToTableProperty, this=table) 1424 1425 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1426 def _parse_ttl(self) -> exp.Expression: 1427 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1428 this = self._parse_bitwise() 1429 1430 if self._match_text_seq("DELETE"): 1431 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1432 if self._match_text_seq("RECOMPRESS"): 1433 return self.expression( 1434 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1435 ) 1436 if self._match_text_seq("TO", "DISK"): 1437 return self.expression( 1438 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1439 ) 1440 if self._match_text_seq("TO", "VOLUME"): 1441 return self.expression( 1442 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1443 ) 1444 1445 return this 1446 1447 expressions = self._parse_csv(_parse_ttl_action) 1448 where = self._parse_where() 1449 group = self._parse_group() 1450 1451 aggregates = None 1452 if group and self._match(TokenType.SET): 1453 aggregates = self._parse_csv(self._parse_set_item) 1454 1455 return self.expression( 1456 exp.MergeTreeTTL, 1457 expressions=expressions, 1458 where=where, 1459 group=group, 1460 aggregates=aggregates, 1461 ) 1462 1463 def _parse_statement(self) -> t.Optional[exp.Expression]: 1464 if self._curr is None: 1465 return None 1466 1467 if self._match_set(self.STATEMENT_PARSERS): 1468 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1469 1470 if self._match_set(Tokenizer.COMMANDS): 1471 return self._parse_command() 1472 1473 expression = self._parse_expression() 1474 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1475 return self._parse_query_modifiers(expression) 1476 1477 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1478 start = self._prev 1479 temporary = self._match(TokenType.TEMPORARY) 1480 materialized = self._match_text_seq("MATERIALIZED") 1481 1482 kind = self._match_set(self.CREATABLES) and self._prev.text 1483 if not kind: 1484 return self._parse_as_command(start) 1485 1486 if_exists = exists or self._parse_exists() 1487 table = self._parse_table_parts( 1488 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1489 ) 1490 1491 if self._match(TokenType.L_PAREN, advance=False): 1492 expressions = self._parse_wrapped_csv(self._parse_types) 1493 else: 1494 expressions = None 1495 1496 return self.expression( 1497 exp.Drop, 1498 comments=start.comments, 1499 exists=if_exists, 1500 this=table, 1501 expressions=expressions, 1502 kind=kind, 1503 temporary=temporary, 1504 materialized=materialized, 1505 cascade=self._match_text_seq("CASCADE"), 1506 constraints=self._match_text_seq("CONSTRAINTS"), 1507 purge=self._match_text_seq("PURGE"), 1508 ) 1509 1510 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1511 return ( 1512 self._match_text_seq("IF") 1513 and (not not_ or self._match(TokenType.NOT)) 1514 and self._match(TokenType.EXISTS) 1515 ) 1516 1517 def _parse_create(self) -> exp.Create | exp.Command: 1518 # Note: this can't be None because we've matched a statement parser 1519 start = self._prev 1520 comments = self._prev_comments 1521 1522 replace = ( 1523 start.token_type == TokenType.REPLACE 1524 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1525 or self._match_pair(TokenType.OR, TokenType.ALTER) 1526 ) 1527 1528 unique = self._match(TokenType.UNIQUE) 1529 1530 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1531 self._advance() 1532 1533 properties = None 1534 create_token = self._match_set(self.CREATABLES) and self._prev 1535 1536 if not create_token: 1537 # exp.Properties.Location.POST_CREATE 1538 properties = self._parse_properties() 1539 create_token = self._match_set(self.CREATABLES) and self._prev 1540 1541 if not properties or not create_token: 1542 return self._parse_as_command(start) 1543 1544 exists = self._parse_exists(not_=True) 1545 this = None 1546 expression: t.Optional[exp.Expression] = None 1547 indexes = None 1548 no_schema_binding = None 1549 begin = None 1550 end = None 1551 clone = None 1552 1553 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1554 nonlocal properties 1555 if properties and temp_props: 1556 properties.expressions.extend(temp_props.expressions) 1557 elif temp_props: 1558 properties = temp_props 1559 1560 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1561 this = self._parse_user_defined_function(kind=create_token.token_type) 1562 1563 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1564 extend_props(self._parse_properties()) 1565 1566 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1567 1568 if not expression: 1569 if self._match(TokenType.COMMAND): 1570 expression = self._parse_as_command(self._prev) 1571 else: 1572 begin = self._match(TokenType.BEGIN) 1573 return_ = self._match_text_seq("RETURN") 1574 1575 if self._match(TokenType.STRING, advance=False): 1576 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1577 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1578 expression = self._parse_string() 1579 extend_props(self._parse_properties()) 1580 else: 1581 expression = self._parse_statement() 1582 1583 end = self._match_text_seq("END") 1584 1585 if return_: 1586 expression = self.expression(exp.Return, this=expression) 1587 elif create_token.token_type == TokenType.INDEX: 1588 this = self._parse_index(index=self._parse_id_var()) 1589 elif create_token.token_type in self.DB_CREATABLES: 1590 table_parts = self._parse_table_parts( 1591 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1592 ) 1593 1594 # exp.Properties.Location.POST_NAME 1595 self._match(TokenType.COMMA) 1596 extend_props(self._parse_properties(before=True)) 1597 1598 this = self._parse_schema(this=table_parts) 1599 1600 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1601 extend_props(self._parse_properties()) 1602 1603 self._match(TokenType.ALIAS) 1604 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1605 # exp.Properties.Location.POST_ALIAS 1606 extend_props(self._parse_properties()) 1607 1608 if create_token.token_type == TokenType.SEQUENCE: 1609 expression = self._parse_types() 1610 extend_props(self._parse_properties()) 1611 else: 1612 expression = self._parse_ddl_select() 1613 1614 if create_token.token_type == TokenType.TABLE: 1615 # exp.Properties.Location.POST_EXPRESSION 1616 extend_props(self._parse_properties()) 1617 1618 indexes = [] 1619 while True: 1620 index = self._parse_index() 1621 1622 # exp.Properties.Location.POST_INDEX 1623 extend_props(self._parse_properties()) 1624 1625 if not index: 1626 break 1627 else: 1628 self._match(TokenType.COMMA) 1629 indexes.append(index) 1630 elif create_token.token_type == TokenType.VIEW: 1631 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1632 no_schema_binding = True 1633 1634 shallow = self._match_text_seq("SHALLOW") 1635 1636 if self._match_texts(self.CLONE_KEYWORDS): 1637 copy = self._prev.text.lower() == "copy" 1638 clone = self.expression( 1639 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1640 ) 1641 1642 if self._curr: 1643 return self._parse_as_command(start) 1644 1645 return self.expression( 1646 exp.Create, 1647 comments=comments, 1648 this=this, 1649 kind=create_token.text.upper(), 1650 replace=replace, 1651 unique=unique, 1652 expression=expression, 1653 exists=exists, 1654 properties=properties, 1655 indexes=indexes, 1656 no_schema_binding=no_schema_binding, 1657 begin=begin, 1658 end=end, 1659 clone=clone, 1660 ) 1661 1662 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1663 seq = exp.SequenceProperties() 1664 1665 options = [] 1666 index = self._index 1667 1668 while self._curr: 1669 if self._match_text_seq("INCREMENT"): 1670 self._match_text_seq("BY") 1671 self._match_text_seq("=") 1672 seq.set("increment", self._parse_term()) 1673 elif self._match_text_seq("MINVALUE"): 1674 seq.set("minvalue", self._parse_term()) 1675 elif self._match_text_seq("MAXVALUE"): 1676 seq.set("maxvalue", self._parse_term()) 1677 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1678 self._match_text_seq("=") 1679 seq.set("start", self._parse_term()) 1680 elif self._match_text_seq("CACHE"): 1681 # T-SQL allows empty CACHE which is initialized dynamically 1682 seq.set("cache", self._parse_number() or True) 1683 elif self._match_text_seq("OWNED", "BY"): 1684 # "OWNED BY NONE" is the default 1685 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1686 else: 1687 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1688 if opt: 1689 options.append(opt) 1690 else: 1691 break 1692 1693 seq.set("options", options if options else None) 1694 return None if self._index == index else seq 1695 1696 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1697 # only used for teradata currently 1698 self._match(TokenType.COMMA) 1699 1700 kwargs = { 1701 "no": self._match_text_seq("NO"), 1702 "dual": self._match_text_seq("DUAL"), 1703 "before": self._match_text_seq("BEFORE"), 1704 "default": self._match_text_seq("DEFAULT"), 1705 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1706 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1707 "after": self._match_text_seq("AFTER"), 1708 "minimum": self._match_texts(("MIN", "MINIMUM")), 1709 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1710 } 1711 1712 if self._match_texts(self.PROPERTY_PARSERS): 1713 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1714 try: 1715 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1716 except TypeError: 1717 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1718 1719 return None 1720 1721 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1722 return self._parse_wrapped_csv(self._parse_property) 1723 1724 def _parse_property(self) -> t.Optional[exp.Expression]: 1725 if self._match_texts(self.PROPERTY_PARSERS): 1726 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1727 1728 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1729 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1730 1731 if self._match_text_seq("COMPOUND", "SORTKEY"): 1732 return self._parse_sortkey(compound=True) 1733 1734 if self._match_text_seq("SQL", "SECURITY"): 1735 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1736 1737 index = self._index 1738 key = self._parse_column() 1739 1740 if not self._match(TokenType.EQ): 1741 self._retreat(index) 1742 return self._parse_sequence_properties() 1743 1744 return self.expression( 1745 exp.Property, 1746 this=key.to_dot() if isinstance(key, exp.Column) else key, 1747 value=self._parse_bitwise() or self._parse_var(any_token=True), 1748 ) 1749 1750 def _parse_stored(self) -> exp.FileFormatProperty: 1751 self._match(TokenType.ALIAS) 1752 1753 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1754 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1755 1756 return self.expression( 1757 exp.FileFormatProperty, 1758 this=( 1759 self.expression( 1760 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1761 ) 1762 if input_format or output_format 1763 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1764 ), 1765 ) 1766 1767 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1768 self._match(TokenType.EQ) 1769 self._match(TokenType.ALIAS) 1770 field = self._parse_field() 1771 if isinstance(field, exp.Identifier) and not field.quoted: 1772 field = exp.var(field) 1773 1774 return self.expression(exp_class, this=field, **kwargs) 1775 1776 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1777 properties = [] 1778 while True: 1779 if before: 1780 prop = self._parse_property_before() 1781 else: 1782 prop = self._parse_property() 1783 if not prop: 1784 break 1785 for p in ensure_list(prop): 1786 properties.append(p) 1787 1788 if properties: 1789 return self.expression(exp.Properties, expressions=properties) 1790 1791 return None 1792 1793 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1794 return self.expression( 1795 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1796 ) 1797 1798 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1799 if self._index >= 2: 1800 pre_volatile_token = self._tokens[self._index - 2] 1801 else: 1802 pre_volatile_token = None 1803 1804 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1805 return exp.VolatileProperty() 1806 1807 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1808 1809 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1810 self._match_pair(TokenType.EQ, TokenType.ON) 1811 1812 prop = self.expression(exp.WithSystemVersioningProperty) 1813 if self._match(TokenType.L_PAREN): 1814 self._match_text_seq("HISTORY_TABLE", "=") 1815 prop.set("this", self._parse_table_parts()) 1816 1817 if self._match(TokenType.COMMA): 1818 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1819 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1820 1821 self._match_r_paren() 1822 1823 return prop 1824 1825 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1826 if self._match(TokenType.L_PAREN, advance=False): 1827 return self._parse_wrapped_properties() 1828 1829 if self._match_text_seq("JOURNAL"): 1830 return self._parse_withjournaltable() 1831 1832 if self._match_texts(self.VIEW_ATTRIBUTES): 1833 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1834 1835 if self._match_text_seq("DATA"): 1836 return self._parse_withdata(no=False) 1837 elif self._match_text_seq("NO", "DATA"): 1838 return self._parse_withdata(no=True) 1839 1840 if not self._next: 1841 return None 1842 1843 return self._parse_withisolatedloading() 1844 1845 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1846 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1847 self._match(TokenType.EQ) 1848 1849 user = self._parse_id_var() 1850 self._match(TokenType.PARAMETER) 1851 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1852 1853 if not user or not host: 1854 return None 1855 1856 return exp.DefinerProperty(this=f"{user}@{host}") 1857 1858 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1859 self._match(TokenType.TABLE) 1860 self._match(TokenType.EQ) 1861 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1862 1863 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1864 return self.expression(exp.LogProperty, no=no) 1865 1866 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1867 return self.expression(exp.JournalProperty, **kwargs) 1868 1869 def _parse_checksum(self) -> exp.ChecksumProperty: 1870 self._match(TokenType.EQ) 1871 1872 on = None 1873 if self._match(TokenType.ON): 1874 on = True 1875 elif self._match_text_seq("OFF"): 1876 on = False 1877 1878 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1879 1880 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1881 return self.expression( 1882 exp.Cluster, 1883 expressions=( 1884 self._parse_wrapped_csv(self._parse_ordered) 1885 if wrapped 1886 else self._parse_csv(self._parse_ordered) 1887 ), 1888 ) 1889 1890 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1891 self._match_text_seq("BY") 1892 1893 self._match_l_paren() 1894 expressions = self._parse_csv(self._parse_column) 1895 self._match_r_paren() 1896 1897 if self._match_text_seq("SORTED", "BY"): 1898 self._match_l_paren() 1899 sorted_by = self._parse_csv(self._parse_ordered) 1900 self._match_r_paren() 1901 else: 1902 sorted_by = None 1903 1904 self._match(TokenType.INTO) 1905 buckets = self._parse_number() 1906 self._match_text_seq("BUCKETS") 1907 1908 return self.expression( 1909 exp.ClusteredByProperty, 1910 expressions=expressions, 1911 sorted_by=sorted_by, 1912 buckets=buckets, 1913 ) 1914 1915 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1916 if not self._match_text_seq("GRANTS"): 1917 self._retreat(self._index - 1) 1918 return None 1919 1920 return self.expression(exp.CopyGrantsProperty) 1921 1922 def _parse_freespace(self) -> exp.FreespaceProperty: 1923 self._match(TokenType.EQ) 1924 return self.expression( 1925 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1926 ) 1927 1928 def _parse_mergeblockratio( 1929 self, no: bool = False, default: bool = False 1930 ) -> exp.MergeBlockRatioProperty: 1931 if self._match(TokenType.EQ): 1932 return self.expression( 1933 exp.MergeBlockRatioProperty, 1934 this=self._parse_number(), 1935 percent=self._match(TokenType.PERCENT), 1936 ) 1937 1938 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1939 1940 def _parse_datablocksize( 1941 self, 1942 default: t.Optional[bool] = None, 1943 minimum: t.Optional[bool] = None, 1944 maximum: t.Optional[bool] = None, 1945 ) -> exp.DataBlocksizeProperty: 1946 self._match(TokenType.EQ) 1947 size = self._parse_number() 1948 1949 units = None 1950 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1951 units = self._prev.text 1952 1953 return self.expression( 1954 exp.DataBlocksizeProperty, 1955 size=size, 1956 units=units, 1957 default=default, 1958 minimum=minimum, 1959 maximum=maximum, 1960 ) 1961 1962 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1963 self._match(TokenType.EQ) 1964 always = self._match_text_seq("ALWAYS") 1965 manual = self._match_text_seq("MANUAL") 1966 never = self._match_text_seq("NEVER") 1967 default = self._match_text_seq("DEFAULT") 1968 1969 autotemp = None 1970 if self._match_text_seq("AUTOTEMP"): 1971 autotemp = self._parse_schema() 1972 1973 return self.expression( 1974 exp.BlockCompressionProperty, 1975 always=always, 1976 manual=manual, 1977 never=never, 1978 default=default, 1979 autotemp=autotemp, 1980 ) 1981 1982 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1983 index = self._index 1984 no = self._match_text_seq("NO") 1985 concurrent = self._match_text_seq("CONCURRENT") 1986 1987 if not self._match_text_seq("ISOLATED", "LOADING"): 1988 self._retreat(index) 1989 return None 1990 1991 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1992 return self.expression( 1993 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1994 ) 1995 1996 def _parse_locking(self) -> exp.LockingProperty: 1997 if self._match(TokenType.TABLE): 1998 kind = "TABLE" 1999 elif self._match(TokenType.VIEW): 2000 kind = "VIEW" 2001 elif self._match(TokenType.ROW): 2002 kind = "ROW" 2003 elif self._match_text_seq("DATABASE"): 2004 kind = "DATABASE" 2005 else: 2006 kind = None 2007 2008 if kind in ("DATABASE", "TABLE", "VIEW"): 2009 this = self._parse_table_parts() 2010 else: 2011 this = None 2012 2013 if self._match(TokenType.FOR): 2014 for_or_in = "FOR" 2015 elif self._match(TokenType.IN): 2016 for_or_in = "IN" 2017 else: 2018 for_or_in = None 2019 2020 if self._match_text_seq("ACCESS"): 2021 lock_type = "ACCESS" 2022 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2023 lock_type = "EXCLUSIVE" 2024 elif self._match_text_seq("SHARE"): 2025 lock_type = "SHARE" 2026 elif self._match_text_seq("READ"): 2027 lock_type = "READ" 2028 elif self._match_text_seq("WRITE"): 2029 lock_type = "WRITE" 2030 elif self._match_text_seq("CHECKSUM"): 2031 lock_type = "CHECKSUM" 2032 else: 2033 lock_type = None 2034 2035 override = self._match_text_seq("OVERRIDE") 2036 2037 return self.expression( 2038 exp.LockingProperty, 2039 this=this, 2040 kind=kind, 2041 for_or_in=for_or_in, 2042 lock_type=lock_type, 2043 override=override, 2044 ) 2045 2046 def _parse_partition_by(self) -> t.List[exp.Expression]: 2047 if self._match(TokenType.PARTITION_BY): 2048 return self._parse_csv(self._parse_conjunction) 2049 return [] 2050 2051 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2052 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2053 if self._match_text_seq("MINVALUE"): 2054 return exp.var("MINVALUE") 2055 if self._match_text_seq("MAXVALUE"): 2056 return exp.var("MAXVALUE") 2057 return self._parse_bitwise() 2058 2059 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2060 expression = None 2061 from_expressions = None 2062 to_expressions = None 2063 2064 if self._match(TokenType.IN): 2065 this = self._parse_wrapped_csv(self._parse_bitwise) 2066 elif self._match(TokenType.FROM): 2067 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2068 self._match_text_seq("TO") 2069 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2070 elif self._match_text_seq("WITH", "(", "MODULUS"): 2071 this = self._parse_number() 2072 self._match_text_seq(",", "REMAINDER") 2073 expression = self._parse_number() 2074 self._match_r_paren() 2075 else: 2076 self.raise_error("Failed to parse partition bound spec.") 2077 2078 return self.expression( 2079 exp.PartitionBoundSpec, 2080 this=this, 2081 expression=expression, 2082 from_expressions=from_expressions, 2083 to_expressions=to_expressions, 2084 ) 2085 2086 # https://www.postgresql.org/docs/current/sql-createtable.html 2087 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2088 if not self._match_text_seq("OF"): 2089 self._retreat(self._index - 1) 2090 return None 2091 2092 this = self._parse_table(schema=True) 2093 2094 if self._match(TokenType.DEFAULT): 2095 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2096 elif self._match_text_seq("FOR", "VALUES"): 2097 expression = self._parse_partition_bound_spec() 2098 else: 2099 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2100 2101 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2102 2103 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2104 self._match(TokenType.EQ) 2105 return self.expression( 2106 exp.PartitionedByProperty, 2107 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2108 ) 2109 2110 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2111 if self._match_text_seq("AND", "STATISTICS"): 2112 statistics = True 2113 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2114 statistics = False 2115 else: 2116 statistics = None 2117 2118 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2119 2120 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2121 if self._match_text_seq("SQL"): 2122 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2123 return None 2124 2125 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2126 if self._match_text_seq("SQL", "DATA"): 2127 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2128 return None 2129 2130 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("PRIMARY", "INDEX"): 2132 return exp.NoPrimaryIndexProperty() 2133 if self._match_text_seq("SQL"): 2134 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2135 return None 2136 2137 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2138 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2139 return exp.OnCommitProperty() 2140 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2141 return exp.OnCommitProperty(delete=True) 2142 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2143 2144 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2145 if self._match_text_seq("SQL", "DATA"): 2146 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2147 return None 2148 2149 def _parse_distkey(self) -> exp.DistKeyProperty: 2150 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2151 2152 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2153 table = self._parse_table(schema=True) 2154 2155 options = [] 2156 while self._match_texts(("INCLUDING", "EXCLUDING")): 2157 this = self._prev.text.upper() 2158 2159 id_var = self._parse_id_var() 2160 if not id_var: 2161 return None 2162 2163 options.append( 2164 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2165 ) 2166 2167 return self.expression(exp.LikeProperty, this=table, expressions=options) 2168 2169 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2170 return self.expression( 2171 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2172 ) 2173 2174 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2175 self._match(TokenType.EQ) 2176 return self.expression( 2177 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2178 ) 2179 2180 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2181 self._match_text_seq("WITH", "CONNECTION") 2182 return self.expression( 2183 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2184 ) 2185 2186 def _parse_returns(self) -> exp.ReturnsProperty: 2187 value: t.Optional[exp.Expression] 2188 is_table = self._match(TokenType.TABLE) 2189 2190 if is_table: 2191 if self._match(TokenType.LT): 2192 value = self.expression( 2193 exp.Schema, 2194 this="TABLE", 2195 expressions=self._parse_csv(self._parse_struct_types), 2196 ) 2197 if not self._match(TokenType.GT): 2198 self.raise_error("Expecting >") 2199 else: 2200 value = self._parse_schema(exp.var("TABLE")) 2201 else: 2202 value = self._parse_types() 2203 2204 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2205 2206 def _parse_describe(self) -> exp.Describe: 2207 kind = self._match_set(self.CREATABLES) and self._prev.text 2208 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2209 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2210 style = None 2211 self._retreat(self._index - 1) 2212 this = self._parse_table(schema=True) 2213 properties = self._parse_properties() 2214 expressions = properties.expressions if properties else None 2215 return self.expression( 2216 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2217 ) 2218 2219 def _parse_insert(self) -> exp.Insert: 2220 comments = ensure_list(self._prev_comments) 2221 hint = self._parse_hint() 2222 overwrite = self._match(TokenType.OVERWRITE) 2223 ignore = self._match(TokenType.IGNORE) 2224 local = self._match_text_seq("LOCAL") 2225 alternative = None 2226 is_function = None 2227 2228 if self._match_text_seq("DIRECTORY"): 2229 this: t.Optional[exp.Expression] = self.expression( 2230 exp.Directory, 2231 this=self._parse_var_or_string(), 2232 local=local, 2233 row_format=self._parse_row_format(match_row=True), 2234 ) 2235 else: 2236 if self._match(TokenType.OR): 2237 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2238 2239 self._match(TokenType.INTO) 2240 comments += ensure_list(self._prev_comments) 2241 self._match(TokenType.TABLE) 2242 is_function = self._match(TokenType.FUNCTION) 2243 2244 this = ( 2245 self._parse_table(schema=True, parse_partition=True) 2246 if not is_function 2247 else self._parse_function() 2248 ) 2249 2250 returning = self._parse_returning() 2251 2252 return self.expression( 2253 exp.Insert, 2254 comments=comments, 2255 hint=hint, 2256 is_function=is_function, 2257 this=this, 2258 stored=self._match_text_seq("STORED") and self._parse_stored(), 2259 by_name=self._match_text_seq("BY", "NAME"), 2260 exists=self._parse_exists(), 2261 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2262 and self._parse_conjunction(), 2263 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2264 conflict=self._parse_on_conflict(), 2265 returning=returning or self._parse_returning(), 2266 overwrite=overwrite, 2267 alternative=alternative, 2268 ignore=ignore, 2269 ) 2270 2271 def _parse_kill(self) -> exp.Kill: 2272 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2273 2274 return self.expression( 2275 exp.Kill, 2276 this=self._parse_primary(), 2277 kind=kind, 2278 ) 2279 2280 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2281 conflict = self._match_text_seq("ON", "CONFLICT") 2282 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2283 2284 if not conflict and not duplicate: 2285 return None 2286 2287 conflict_keys = None 2288 constraint = None 2289 2290 if conflict: 2291 if self._match_text_seq("ON", "CONSTRAINT"): 2292 constraint = self._parse_id_var() 2293 elif self._match(TokenType.L_PAREN): 2294 conflict_keys = self._parse_csv(self._parse_id_var) 2295 self._match_r_paren() 2296 2297 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2298 if self._prev.token_type == TokenType.UPDATE: 2299 self._match(TokenType.SET) 2300 expressions = self._parse_csv(self._parse_equality) 2301 else: 2302 expressions = None 2303 2304 return self.expression( 2305 exp.OnConflict, 2306 duplicate=duplicate, 2307 expressions=expressions, 2308 action=action, 2309 conflict_keys=conflict_keys, 2310 constraint=constraint, 2311 ) 2312 2313 def _parse_returning(self) -> t.Optional[exp.Returning]: 2314 if not self._match(TokenType.RETURNING): 2315 return None 2316 return self.expression( 2317 exp.Returning, 2318 expressions=self._parse_csv(self._parse_expression), 2319 into=self._match(TokenType.INTO) and self._parse_table_part(), 2320 ) 2321 2322 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2323 if not self._match(TokenType.FORMAT): 2324 return None 2325 return self._parse_row_format() 2326 2327 def _parse_row_format( 2328 self, match_row: bool = False 2329 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2330 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2331 return None 2332 2333 if self._match_text_seq("SERDE"): 2334 this = self._parse_string() 2335 2336 serde_properties = None 2337 if self._match(TokenType.SERDE_PROPERTIES): 2338 serde_properties = self.expression( 2339 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2340 ) 2341 2342 return self.expression( 2343 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2344 ) 2345 2346 self._match_text_seq("DELIMITED") 2347 2348 kwargs = {} 2349 2350 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2351 kwargs["fields"] = self._parse_string() 2352 if self._match_text_seq("ESCAPED", "BY"): 2353 kwargs["escaped"] = self._parse_string() 2354 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2355 kwargs["collection_items"] = self._parse_string() 2356 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2357 kwargs["map_keys"] = self._parse_string() 2358 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2359 kwargs["lines"] = self._parse_string() 2360 if self._match_text_seq("NULL", "DEFINED", "AS"): 2361 kwargs["null"] = self._parse_string() 2362 2363 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2364 2365 def _parse_load(self) -> exp.LoadData | exp.Command: 2366 if self._match_text_seq("DATA"): 2367 local = self._match_text_seq("LOCAL") 2368 self._match_text_seq("INPATH") 2369 inpath = self._parse_string() 2370 overwrite = self._match(TokenType.OVERWRITE) 2371 self._match_pair(TokenType.INTO, TokenType.TABLE) 2372 2373 return self.expression( 2374 exp.LoadData, 2375 this=self._parse_table(schema=True), 2376 local=local, 2377 overwrite=overwrite, 2378 inpath=inpath, 2379 partition=self._parse_partition(), 2380 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2381 serde=self._match_text_seq("SERDE") and self._parse_string(), 2382 ) 2383 return self._parse_as_command(self._prev) 2384 2385 def _parse_delete(self) -> exp.Delete: 2386 # This handles MySQL's "Multiple-Table Syntax" 2387 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2388 tables = None 2389 comments = self._prev_comments 2390 if not self._match(TokenType.FROM, advance=False): 2391 tables = self._parse_csv(self._parse_table) or None 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Delete, 2397 comments=comments, 2398 tables=tables, 2399 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2400 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2401 where=self._parse_where(), 2402 returning=returning or self._parse_returning(), 2403 limit=self._parse_limit(), 2404 ) 2405 2406 def _parse_update(self) -> exp.Update: 2407 comments = self._prev_comments 2408 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2409 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2410 returning = self._parse_returning() 2411 return self.expression( 2412 exp.Update, 2413 comments=comments, 2414 **{ # type: ignore 2415 "this": this, 2416 "expressions": expressions, 2417 "from": self._parse_from(joins=True), 2418 "where": self._parse_where(), 2419 "returning": returning or self._parse_returning(), 2420 "order": self._parse_order(), 2421 "limit": self._parse_limit(), 2422 }, 2423 ) 2424 2425 def _parse_uncache(self) -> exp.Uncache: 2426 if not self._match(TokenType.TABLE): 2427 self.raise_error("Expecting TABLE after UNCACHE") 2428 2429 return self.expression( 2430 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2431 ) 2432 2433 def _parse_cache(self) -> exp.Cache: 2434 lazy = self._match_text_seq("LAZY") 2435 self._match(TokenType.TABLE) 2436 table = self._parse_table(schema=True) 2437 2438 options = [] 2439 if self._match_text_seq("OPTIONS"): 2440 self._match_l_paren() 2441 k = self._parse_string() 2442 self._match(TokenType.EQ) 2443 v = self._parse_string() 2444 options = [k, v] 2445 self._match_r_paren() 2446 2447 self._match(TokenType.ALIAS) 2448 return self.expression( 2449 exp.Cache, 2450 this=table, 2451 lazy=lazy, 2452 options=options, 2453 expression=self._parse_select(nested=True), 2454 ) 2455 2456 def _parse_partition(self) -> t.Optional[exp.Partition]: 2457 if not self._match(TokenType.PARTITION): 2458 return None 2459 2460 return self.expression( 2461 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2462 ) 2463 2464 def _parse_value(self) -> exp.Tuple: 2465 if self._match(TokenType.L_PAREN): 2466 expressions = self._parse_csv(self._parse_expression) 2467 self._match_r_paren() 2468 return self.expression(exp.Tuple, expressions=expressions) 2469 2470 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2471 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2472 2473 def _parse_projections(self) -> t.List[exp.Expression]: 2474 return self._parse_expressions() 2475 2476 def _parse_select( 2477 self, 2478 nested: bool = False, 2479 table: bool = False, 2480 parse_subquery_alias: bool = True, 2481 parse_set_operation: bool = True, 2482 ) -> t.Optional[exp.Expression]: 2483 cte = self._parse_with() 2484 2485 if cte: 2486 this = self._parse_statement() 2487 2488 if not this: 2489 self.raise_error("Failed to parse any statement following CTE") 2490 return cte 2491 2492 if "with" in this.arg_types: 2493 this.set("with", cte) 2494 else: 2495 self.raise_error(f"{this.key} does not support CTE") 2496 this = cte 2497 2498 return this 2499 2500 # duckdb supports leading with FROM x 2501 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2502 2503 if self._match(TokenType.SELECT): 2504 comments = self._prev_comments 2505 2506 hint = self._parse_hint() 2507 all_ = self._match(TokenType.ALL) 2508 distinct = self._match_set(self.DISTINCT_TOKENS) 2509 2510 kind = ( 2511 self._match(TokenType.ALIAS) 2512 and self._match_texts(("STRUCT", "VALUE")) 2513 and self._prev.text.upper() 2514 ) 2515 2516 if distinct: 2517 distinct = self.expression( 2518 exp.Distinct, 2519 on=self._parse_value() if self._match(TokenType.ON) else None, 2520 ) 2521 2522 if all_ and distinct: 2523 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2524 2525 limit = self._parse_limit(top=True) 2526 projections = self._parse_projections() 2527 2528 this = self.expression( 2529 exp.Select, 2530 kind=kind, 2531 hint=hint, 2532 distinct=distinct, 2533 expressions=projections, 2534 limit=limit, 2535 ) 2536 this.comments = comments 2537 2538 into = self._parse_into() 2539 if into: 2540 this.set("into", into) 2541 2542 if not from_: 2543 from_ = self._parse_from() 2544 2545 if from_: 2546 this.set("from", from_) 2547 2548 this = self._parse_query_modifiers(this) 2549 elif (table or nested) and self._match(TokenType.L_PAREN): 2550 if self._match(TokenType.PIVOT): 2551 this = self._parse_simplified_pivot() 2552 elif self._match(TokenType.FROM): 2553 this = exp.select("*").from_( 2554 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2555 ) 2556 else: 2557 this = ( 2558 self._parse_table() 2559 if table 2560 else self._parse_select(nested=True, parse_set_operation=False) 2561 ) 2562 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2563 2564 self._match_r_paren() 2565 2566 # We return early here so that the UNION isn't attached to the subquery by the 2567 # following call to _parse_set_operations, but instead becomes the parent node 2568 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2569 elif self._match(TokenType.VALUES, advance=False): 2570 this = self._parse_derived_table_values() 2571 elif from_: 2572 this = exp.select("*").from_(from_.this, copy=False) 2573 else: 2574 this = None 2575 2576 if parse_set_operation: 2577 return self._parse_set_operations(this) 2578 return this 2579 2580 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2581 if not skip_with_token and not self._match(TokenType.WITH): 2582 return None 2583 2584 comments = self._prev_comments 2585 recursive = self._match(TokenType.RECURSIVE) 2586 2587 expressions = [] 2588 while True: 2589 expressions.append(self._parse_cte()) 2590 2591 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2592 break 2593 else: 2594 self._match(TokenType.WITH) 2595 2596 return self.expression( 2597 exp.With, comments=comments, expressions=expressions, recursive=recursive 2598 ) 2599 2600 def _parse_cte(self) -> exp.CTE: 2601 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2602 if not alias or not alias.this: 2603 self.raise_error("Expected CTE to have alias") 2604 2605 self._match(TokenType.ALIAS) 2606 2607 if self._match_text_seq("NOT", "MATERIALIZED"): 2608 materialized = False 2609 elif self._match_text_seq("MATERIALIZED"): 2610 materialized = True 2611 else: 2612 materialized = None 2613 2614 return self.expression( 2615 exp.CTE, 2616 this=self._parse_wrapped(self._parse_statement), 2617 alias=alias, 2618 materialized=materialized, 2619 ) 2620 2621 def _parse_table_alias( 2622 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2623 ) -> t.Optional[exp.TableAlias]: 2624 any_token = self._match(TokenType.ALIAS) 2625 alias = ( 2626 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2627 or self._parse_string_as_identifier() 2628 ) 2629 2630 index = self._index 2631 if self._match(TokenType.L_PAREN): 2632 columns = self._parse_csv(self._parse_function_parameter) 2633 self._match_r_paren() if columns else self._retreat(index) 2634 else: 2635 columns = None 2636 2637 if not alias and not columns: 2638 return None 2639 2640 return self.expression(exp.TableAlias, this=alias, columns=columns) 2641 2642 def _parse_subquery( 2643 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2644 ) -> t.Optional[exp.Subquery]: 2645 if not this: 2646 return None 2647 2648 return self.expression( 2649 exp.Subquery, 2650 this=this, 2651 pivots=self._parse_pivots(), 2652 alias=self._parse_table_alias() if parse_alias else None, 2653 ) 2654 2655 def _implicit_unnests_to_explicit(self, this: E) -> E: 2656 from sqlglot.optimizer.normalize_identifiers import ( 2657 normalize_identifiers as _norm, 2658 ) 2659 2660 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2661 for i, join in enumerate(this.args.get("joins") or []): 2662 table = join.this 2663 normalized_table = table.copy() 2664 normalized_table.meta["maybe_column"] = True 2665 normalized_table = _norm(normalized_table, dialect=self.dialect) 2666 2667 if isinstance(table, exp.Table) and not join.args.get("on"): 2668 if normalized_table.parts[0].name in refs: 2669 table_as_column = table.to_column() 2670 unnest = exp.Unnest(expressions=[table_as_column]) 2671 2672 # Table.to_column creates a parent Alias node that we want to convert to 2673 # a TableAlias and attach to the Unnest, so it matches the parser's output 2674 if isinstance(table.args.get("alias"), exp.TableAlias): 2675 table_as_column.replace(table_as_column.this) 2676 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2677 2678 table.replace(unnest) 2679 2680 refs.add(normalized_table.alias_or_name) 2681 2682 return this 2683 2684 def _parse_query_modifiers( 2685 self, this: t.Optional[exp.Expression] 2686 ) -> t.Optional[exp.Expression]: 2687 if isinstance(this, (exp.Query, exp.Table)): 2688 for join in self._parse_joins(): 2689 this.append("joins", join) 2690 for lateral in iter(self._parse_lateral, None): 2691 this.append("laterals", lateral) 2692 2693 while True: 2694 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2695 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2696 key, expression = parser(self) 2697 2698 if expression: 2699 this.set(key, expression) 2700 if key == "limit": 2701 offset = expression.args.pop("offset", None) 2702 2703 if offset: 2704 offset = exp.Offset(expression=offset) 2705 this.set("offset", offset) 2706 2707 limit_by_expressions = expression.expressions 2708 expression.set("expressions", None) 2709 offset.set("expressions", limit_by_expressions) 2710 continue 2711 break 2712 2713 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2714 this = self._implicit_unnests_to_explicit(this) 2715 2716 return this 2717 2718 def _parse_hint(self) -> t.Optional[exp.Hint]: 2719 if self._match(TokenType.HINT): 2720 hints = [] 2721 for hint in iter( 2722 lambda: self._parse_csv( 2723 lambda: self._parse_function() or self._parse_var(upper=True) 2724 ), 2725 [], 2726 ): 2727 hints.extend(hint) 2728 2729 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2730 self.raise_error("Expected */ after HINT") 2731 2732 return self.expression(exp.Hint, expressions=hints) 2733 2734 return None 2735 2736 def _parse_into(self) -> t.Optional[exp.Into]: 2737 if not self._match(TokenType.INTO): 2738 return None 2739 2740 temp = self._match(TokenType.TEMPORARY) 2741 unlogged = self._match_text_seq("UNLOGGED") 2742 self._match(TokenType.TABLE) 2743 2744 return self.expression( 2745 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2746 ) 2747 2748 def _parse_from( 2749 self, joins: bool = False, skip_from_token: bool = False 2750 ) -> t.Optional[exp.From]: 2751 if not skip_from_token and not self._match(TokenType.FROM): 2752 return None 2753 2754 return self.expression( 2755 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2756 ) 2757 2758 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2759 return self.expression( 2760 exp.MatchRecognizeMeasure, 2761 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2762 this=self._parse_expression(), 2763 ) 2764 2765 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2766 if not self._match(TokenType.MATCH_RECOGNIZE): 2767 return None 2768 2769 self._match_l_paren() 2770 2771 partition = self._parse_partition_by() 2772 order = self._parse_order() 2773 2774 measures = ( 2775 self._parse_csv(self._parse_match_recognize_measure) 2776 if self._match_text_seq("MEASURES") 2777 else None 2778 ) 2779 2780 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2781 rows = exp.var("ONE ROW PER MATCH") 2782 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2783 text = "ALL ROWS PER MATCH" 2784 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2785 text += " SHOW EMPTY MATCHES" 2786 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2787 text += " OMIT EMPTY MATCHES" 2788 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2789 text += " WITH UNMATCHED ROWS" 2790 rows = exp.var(text) 2791 else: 2792 rows = None 2793 2794 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2795 text = "AFTER MATCH SKIP" 2796 if self._match_text_seq("PAST", "LAST", "ROW"): 2797 text += " PAST LAST ROW" 2798 elif self._match_text_seq("TO", "NEXT", "ROW"): 2799 text += " TO NEXT ROW" 2800 elif self._match_text_seq("TO", "FIRST"): 2801 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2802 elif self._match_text_seq("TO", "LAST"): 2803 text += f" TO LAST {self._advance_any().text}" # type: ignore 2804 after = exp.var(text) 2805 else: 2806 after = None 2807 2808 if self._match_text_seq("PATTERN"): 2809 self._match_l_paren() 2810 2811 if not self._curr: 2812 self.raise_error("Expecting )", self._curr) 2813 2814 paren = 1 2815 start = self._curr 2816 2817 while self._curr and paren > 0: 2818 if self._curr.token_type == TokenType.L_PAREN: 2819 paren += 1 2820 if self._curr.token_type == TokenType.R_PAREN: 2821 paren -= 1 2822 2823 end = self._prev 2824 self._advance() 2825 2826 if paren > 0: 2827 self.raise_error("Expecting )", self._curr) 2828 2829 pattern = exp.var(self._find_sql(start, end)) 2830 else: 2831 pattern = None 2832 2833 define = ( 2834 self._parse_csv(self._parse_name_as_expression) 2835 if self._match_text_seq("DEFINE") 2836 else None 2837 ) 2838 2839 self._match_r_paren() 2840 2841 return self.expression( 2842 exp.MatchRecognize, 2843 partition_by=partition, 2844 order=order, 2845 measures=measures, 2846 rows=rows, 2847 after=after, 2848 pattern=pattern, 2849 define=define, 2850 alias=self._parse_table_alias(), 2851 ) 2852 2853 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2854 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2855 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2856 cross_apply = False 2857 2858 if cross_apply is not None: 2859 this = self._parse_select(table=True) 2860 view = None 2861 outer = None 2862 elif self._match(TokenType.LATERAL): 2863 this = self._parse_select(table=True) 2864 view = self._match(TokenType.VIEW) 2865 outer = self._match(TokenType.OUTER) 2866 else: 2867 return None 2868 2869 if not this: 2870 this = ( 2871 self._parse_unnest() 2872 or self._parse_function() 2873 or self._parse_id_var(any_token=False) 2874 ) 2875 2876 while self._match(TokenType.DOT): 2877 this = exp.Dot( 2878 this=this, 2879 expression=self._parse_function() or self._parse_id_var(any_token=False), 2880 ) 2881 2882 if view: 2883 table = self._parse_id_var(any_token=False) 2884 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2885 table_alias: t.Optional[exp.TableAlias] = self.expression( 2886 exp.TableAlias, this=table, columns=columns 2887 ) 2888 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2889 # We move the alias from the lateral's child node to the lateral itself 2890 table_alias = this.args["alias"].pop() 2891 else: 2892 table_alias = self._parse_table_alias() 2893 2894 return self.expression( 2895 exp.Lateral, 2896 this=this, 2897 view=view, 2898 outer=outer, 2899 alias=table_alias, 2900 cross_apply=cross_apply, 2901 ) 2902 2903 def _parse_join_parts( 2904 self, 2905 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2906 return ( 2907 self._match_set(self.JOIN_METHODS) and self._prev, 2908 self._match_set(self.JOIN_SIDES) and self._prev, 2909 self._match_set(self.JOIN_KINDS) and self._prev, 2910 ) 2911 2912 def _parse_join( 2913 self, skip_join_token: bool = False, parse_bracket: bool = False 2914 ) -> t.Optional[exp.Join]: 2915 if self._match(TokenType.COMMA): 2916 return self.expression(exp.Join, this=self._parse_table()) 2917 2918 index = self._index 2919 method, side, kind = self._parse_join_parts() 2920 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2921 join = self._match(TokenType.JOIN) 2922 2923 if not skip_join_token and not join: 2924 self._retreat(index) 2925 kind = None 2926 method = None 2927 side = None 2928 2929 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2930 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2931 2932 if not skip_join_token and not join and not outer_apply and not cross_apply: 2933 return None 2934 2935 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2936 2937 if method: 2938 kwargs["method"] = method.text 2939 if side: 2940 kwargs["side"] = side.text 2941 if kind: 2942 kwargs["kind"] = kind.text 2943 if hint: 2944 kwargs["hint"] = hint 2945 2946 if self._match(TokenType.MATCH_CONDITION): 2947 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2948 2949 if self._match(TokenType.ON): 2950 kwargs["on"] = self._parse_conjunction() 2951 elif self._match(TokenType.USING): 2952 kwargs["using"] = self._parse_wrapped_id_vars() 2953 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2954 kind and kind.token_type == TokenType.CROSS 2955 ): 2956 index = self._index 2957 joins: t.Optional[list] = list(self._parse_joins()) 2958 2959 if joins and self._match(TokenType.ON): 2960 kwargs["on"] = self._parse_conjunction() 2961 elif joins and self._match(TokenType.USING): 2962 kwargs["using"] = self._parse_wrapped_id_vars() 2963 else: 2964 joins = None 2965 self._retreat(index) 2966 2967 kwargs["this"].set("joins", joins if joins else None) 2968 2969 comments = [c for token in (method, side, kind) if token for c in token.comments] 2970 return self.expression(exp.Join, comments=comments, **kwargs) 2971 2972 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2973 this = self._parse_conjunction() 2974 2975 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2976 return this 2977 2978 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2979 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2980 2981 return this 2982 2983 def _parse_index_params(self) -> exp.IndexParameters: 2984 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2985 2986 if self._match(TokenType.L_PAREN, advance=False): 2987 columns = self._parse_wrapped_csv(self._parse_with_operator) 2988 else: 2989 columns = None 2990 2991 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2992 partition_by = self._parse_partition_by() 2993 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2994 tablespace = ( 2995 self._parse_var(any_token=True) 2996 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2997 else None 2998 ) 2999 where = self._parse_where() 3000 3001 return self.expression( 3002 exp.IndexParameters, 3003 using=using, 3004 columns=columns, 3005 include=include, 3006 partition_by=partition_by, 3007 where=where, 3008 with_storage=with_storage, 3009 tablespace=tablespace, 3010 ) 3011 3012 def _parse_index( 3013 self, 3014 index: t.Optional[exp.Expression] = None, 3015 ) -> t.Optional[exp.Index]: 3016 if index: 3017 unique = None 3018 primary = None 3019 amp = None 3020 3021 self._match(TokenType.ON) 3022 self._match(TokenType.TABLE) # hive 3023 table = self._parse_table_parts(schema=True) 3024 else: 3025 unique = self._match(TokenType.UNIQUE) 3026 primary = self._match_text_seq("PRIMARY") 3027 amp = self._match_text_seq("AMP") 3028 3029 if not self._match(TokenType.INDEX): 3030 return None 3031 3032 index = self._parse_id_var() 3033 table = None 3034 3035 params = self._parse_index_params() 3036 3037 return self.expression( 3038 exp.Index, 3039 this=index, 3040 table=table, 3041 unique=unique, 3042 primary=primary, 3043 amp=amp, 3044 params=params, 3045 ) 3046 3047 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3048 hints: t.List[exp.Expression] = [] 3049 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3050 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3051 hints.append( 3052 self.expression( 3053 exp.WithTableHint, 3054 expressions=self._parse_csv( 3055 lambda: self._parse_function() or self._parse_var(any_token=True) 3056 ), 3057 ) 3058 ) 3059 self._match_r_paren() 3060 else: 3061 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3062 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3063 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3064 3065 self._match_texts(("INDEX", "KEY")) 3066 if self._match(TokenType.FOR): 3067 hint.set("target", self._advance_any() and self._prev.text.upper()) 3068 3069 hint.set("expressions", self._parse_wrapped_id_vars()) 3070 hints.append(hint) 3071 3072 return hints or None 3073 3074 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3075 return ( 3076 (not schema and self._parse_function(optional_parens=False)) 3077 or self._parse_id_var(any_token=False) 3078 or self._parse_string_as_identifier() 3079 or self._parse_placeholder() 3080 ) 3081 3082 def _parse_table_parts( 3083 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3084 ) -> exp.Table: 3085 catalog = None 3086 db = None 3087 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3088 3089 while self._match(TokenType.DOT): 3090 if catalog: 3091 # This allows nesting the table in arbitrarily many dot expressions if needed 3092 table = self.expression( 3093 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3094 ) 3095 else: 3096 catalog = db 3097 db = table 3098 # "" used for tsql FROM a..b case 3099 table = self._parse_table_part(schema=schema) or "" 3100 3101 if ( 3102 wildcard 3103 and self._is_connected() 3104 and (isinstance(table, exp.Identifier) or not table) 3105 and self._match(TokenType.STAR) 3106 ): 3107 if isinstance(table, exp.Identifier): 3108 table.args["this"] += "*" 3109 else: 3110 table = exp.Identifier(this="*") 3111 3112 if is_db_reference: 3113 catalog = db 3114 db = table 3115 table = None 3116 3117 if not table and not is_db_reference: 3118 self.raise_error(f"Expected table name but got {self._curr}") 3119 if not db and is_db_reference: 3120 self.raise_error(f"Expected database name but got {self._curr}") 3121 3122 return self.expression( 3123 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3124 ) 3125 3126 def _parse_table( 3127 self, 3128 schema: bool = False, 3129 joins: bool = False, 3130 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3131 parse_bracket: bool = False, 3132 is_db_reference: bool = False, 3133 parse_partition: bool = False, 3134 ) -> t.Optional[exp.Expression]: 3135 lateral = self._parse_lateral() 3136 if lateral: 3137 return lateral 3138 3139 unnest = self._parse_unnest() 3140 if unnest: 3141 return unnest 3142 3143 values = self._parse_derived_table_values() 3144 if values: 3145 return values 3146 3147 subquery = self._parse_select(table=True) 3148 if subquery: 3149 if not subquery.args.get("pivots"): 3150 subquery.set("pivots", self._parse_pivots()) 3151 return subquery 3152 3153 bracket = parse_bracket and self._parse_bracket(None) 3154 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3155 3156 only = self._match(TokenType.ONLY) 3157 3158 this = t.cast( 3159 exp.Expression, 3160 bracket 3161 or self._parse_bracket( 3162 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3163 ), 3164 ) 3165 3166 if only: 3167 this.set("only", only) 3168 3169 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3170 self._match_text_seq("*") 3171 3172 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3173 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3174 this.set("partition", self._parse_partition()) 3175 3176 if schema: 3177 return self._parse_schema(this=this) 3178 3179 version = self._parse_version() 3180 3181 if version: 3182 this.set("version", version) 3183 3184 if self.dialect.ALIAS_POST_TABLESAMPLE: 3185 table_sample = self._parse_table_sample() 3186 3187 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3188 if alias: 3189 this.set("alias", alias) 3190 3191 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3192 return self.expression( 3193 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3194 ) 3195 3196 this.set("hints", self._parse_table_hints()) 3197 3198 if not this.args.get("pivots"): 3199 this.set("pivots", self._parse_pivots()) 3200 3201 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3202 table_sample = self._parse_table_sample() 3203 3204 if table_sample: 3205 table_sample.set("this", this) 3206 this = table_sample 3207 3208 if joins: 3209 for join in self._parse_joins(): 3210 this.append("joins", join) 3211 3212 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3213 this.set("ordinality", True) 3214 this.set("alias", self._parse_table_alias()) 3215 3216 return this 3217 3218 def _parse_version(self) -> t.Optional[exp.Version]: 3219 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3220 this = "TIMESTAMP" 3221 elif self._match(TokenType.VERSION_SNAPSHOT): 3222 this = "VERSION" 3223 else: 3224 return None 3225 3226 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3227 kind = self._prev.text.upper() 3228 start = self._parse_bitwise() 3229 self._match_texts(("TO", "AND")) 3230 end = self._parse_bitwise() 3231 expression: t.Optional[exp.Expression] = self.expression( 3232 exp.Tuple, expressions=[start, end] 3233 ) 3234 elif self._match_text_seq("CONTAINED", "IN"): 3235 kind = "CONTAINED IN" 3236 expression = self.expression( 3237 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3238 ) 3239 elif self._match(TokenType.ALL): 3240 kind = "ALL" 3241 expression = None 3242 else: 3243 self._match_text_seq("AS", "OF") 3244 kind = "AS OF" 3245 expression = self._parse_type() 3246 3247 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3248 3249 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3250 if not self._match(TokenType.UNNEST): 3251 return None 3252 3253 expressions = self._parse_wrapped_csv(self._parse_equality) 3254 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3255 3256 alias = self._parse_table_alias() if with_alias else None 3257 3258 if alias: 3259 if self.dialect.UNNEST_COLUMN_ONLY: 3260 if alias.args.get("columns"): 3261 self.raise_error("Unexpected extra column alias in unnest.") 3262 3263 alias.set("columns", [alias.this]) 3264 alias.set("this", None) 3265 3266 columns = alias.args.get("columns") or [] 3267 if offset and len(expressions) < len(columns): 3268 offset = columns.pop() 3269 3270 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3271 self._match(TokenType.ALIAS) 3272 offset = self._parse_id_var( 3273 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3274 ) or exp.to_identifier("offset") 3275 3276 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3277 3278 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3279 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3280 if not is_derived and not self._match_text_seq("VALUES"): 3281 return None 3282 3283 expressions = self._parse_csv(self._parse_value) 3284 alias = self._parse_table_alias() 3285 3286 if is_derived: 3287 self._match_r_paren() 3288 3289 return self.expression( 3290 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3291 ) 3292 3293 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3294 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3295 as_modifier and self._match_text_seq("USING", "SAMPLE") 3296 ): 3297 return None 3298 3299 bucket_numerator = None 3300 bucket_denominator = None 3301 bucket_field = None 3302 percent = None 3303 size = None 3304 seed = None 3305 3306 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3307 matched_l_paren = self._match(TokenType.L_PAREN) 3308 3309 if self.TABLESAMPLE_CSV: 3310 num = None 3311 expressions = self._parse_csv(self._parse_primary) 3312 else: 3313 expressions = None 3314 num = ( 3315 self._parse_factor() 3316 if self._match(TokenType.NUMBER, advance=False) 3317 else self._parse_primary() or self._parse_placeholder() 3318 ) 3319 3320 if self._match_text_seq("BUCKET"): 3321 bucket_numerator = self._parse_number() 3322 self._match_text_seq("OUT", "OF") 3323 bucket_denominator = bucket_denominator = self._parse_number() 3324 self._match(TokenType.ON) 3325 bucket_field = self._parse_field() 3326 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3327 percent = num 3328 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3329 size = num 3330 else: 3331 percent = num 3332 3333 if matched_l_paren: 3334 self._match_r_paren() 3335 3336 if self._match(TokenType.L_PAREN): 3337 method = self._parse_var(upper=True) 3338 seed = self._match(TokenType.COMMA) and self._parse_number() 3339 self._match_r_paren() 3340 elif self._match_texts(("SEED", "REPEATABLE")): 3341 seed = self._parse_wrapped(self._parse_number) 3342 3343 return self.expression( 3344 exp.TableSample, 3345 expressions=expressions, 3346 method=method, 3347 bucket_numerator=bucket_numerator, 3348 bucket_denominator=bucket_denominator, 3349 bucket_field=bucket_field, 3350 percent=percent, 3351 size=size, 3352 seed=seed, 3353 ) 3354 3355 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3356 return list(iter(self._parse_pivot, None)) or None 3357 3358 def _parse_joins(self) -> t.Iterator[exp.Join]: 3359 return iter(self._parse_join, None) 3360 3361 # https://duckdb.org/docs/sql/statements/pivot 3362 def _parse_simplified_pivot(self) -> exp.Pivot: 3363 def _parse_on() -> t.Optional[exp.Expression]: 3364 this = self._parse_bitwise() 3365 return self._parse_in(this) if self._match(TokenType.IN) else this 3366 3367 this = self._parse_table() 3368 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3369 using = self._match(TokenType.USING) and self._parse_csv( 3370 lambda: self._parse_alias(self._parse_function()) 3371 ) 3372 group = self._parse_group() 3373 return self.expression( 3374 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3375 ) 3376 3377 def _parse_pivot_in(self) -> exp.In: 3378 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3379 this = self._parse_conjunction() 3380 3381 self._match(TokenType.ALIAS) 3382 alias = self._parse_field() 3383 if alias: 3384 return self.expression(exp.PivotAlias, this=this, alias=alias) 3385 3386 return this 3387 3388 value = self._parse_column() 3389 3390 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3391 self.raise_error("Expecting IN (") 3392 3393 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3394 3395 self._match_r_paren() 3396 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3397 3398 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3399 index = self._index 3400 include_nulls = None 3401 3402 if self._match(TokenType.PIVOT): 3403 unpivot = False 3404 elif self._match(TokenType.UNPIVOT): 3405 unpivot = True 3406 3407 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3408 if self._match_text_seq("INCLUDE", "NULLS"): 3409 include_nulls = True 3410 elif self._match_text_seq("EXCLUDE", "NULLS"): 3411 include_nulls = False 3412 else: 3413 return None 3414 3415 expressions = [] 3416 3417 if not self._match(TokenType.L_PAREN): 3418 self._retreat(index) 3419 return None 3420 3421 if unpivot: 3422 expressions = self._parse_csv(self._parse_column) 3423 else: 3424 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3425 3426 if not expressions: 3427 self.raise_error("Failed to parse PIVOT's aggregation list") 3428 3429 if not self._match(TokenType.FOR): 3430 self.raise_error("Expecting FOR") 3431 3432 field = self._parse_pivot_in() 3433 3434 self._match_r_paren() 3435 3436 pivot = self.expression( 3437 exp.Pivot, 3438 expressions=expressions, 3439 field=field, 3440 unpivot=unpivot, 3441 include_nulls=include_nulls, 3442 ) 3443 3444 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3445 pivot.set("alias", self._parse_table_alias()) 3446 3447 if not unpivot: 3448 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3449 3450 columns: t.List[exp.Expression] = [] 3451 for fld in pivot.args["field"].expressions: 3452 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3453 for name in names: 3454 if self.PREFIXED_PIVOT_COLUMNS: 3455 name = f"{name}_{field_name}" if name else field_name 3456 else: 3457 name = f"{field_name}_{name}" if name else field_name 3458 3459 columns.append(exp.to_identifier(name)) 3460 3461 pivot.set("columns", columns) 3462 3463 return pivot 3464 3465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3466 return [agg.alias for agg in aggregations] 3467 3468 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3469 if not skip_where_token and not self._match(TokenType.PREWHERE): 3470 return None 3471 3472 return self.expression( 3473 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3474 ) 3475 3476 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3477 if not skip_where_token and not self._match(TokenType.WHERE): 3478 return None 3479 3480 return self.expression( 3481 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3482 ) 3483 3484 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3485 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3486 return None 3487 3488 elements: t.Dict[str, t.Any] = defaultdict(list) 3489 3490 if self._match(TokenType.ALL): 3491 elements["all"] = True 3492 elif self._match(TokenType.DISTINCT): 3493 elements["all"] = False 3494 3495 while True: 3496 expressions = self._parse_csv(self._parse_conjunction) 3497 if expressions: 3498 elements["expressions"].extend(expressions) 3499 3500 grouping_sets = self._parse_grouping_sets() 3501 if grouping_sets: 3502 elements["grouping_sets"].extend(grouping_sets) 3503 3504 rollup = None 3505 cube = None 3506 totals = None 3507 3508 index = self._index 3509 with_ = self._match(TokenType.WITH) 3510 if self._match(TokenType.ROLLUP): 3511 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3512 elements["rollup"].extend(ensure_list(rollup)) 3513 3514 if self._match(TokenType.CUBE): 3515 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3516 elements["cube"].extend(ensure_list(cube)) 3517 3518 if self._match_text_seq("TOTALS"): 3519 totals = True 3520 elements["totals"] = True # type: ignore 3521 3522 if not (grouping_sets or rollup or cube or totals): 3523 if with_: 3524 self._retreat(index) 3525 break 3526 3527 return self.expression(exp.Group, **elements) # type: ignore 3528 3529 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3530 if not self._match(TokenType.GROUPING_SETS): 3531 return None 3532 3533 return self._parse_wrapped_csv(self._parse_grouping_set) 3534 3535 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3536 if self._match(TokenType.L_PAREN): 3537 grouping_set = self._parse_csv(self._parse_column) 3538 self._match_r_paren() 3539 return self.expression(exp.Tuple, expressions=grouping_set) 3540 3541 return self._parse_column() 3542 3543 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3544 if not skip_having_token and not self._match(TokenType.HAVING): 3545 return None 3546 return self.expression(exp.Having, this=self._parse_conjunction()) 3547 3548 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3549 if not self._match(TokenType.QUALIFY): 3550 return None 3551 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3552 3553 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3554 if skip_start_token: 3555 start = None 3556 elif self._match(TokenType.START_WITH): 3557 start = self._parse_conjunction() 3558 else: 3559 return None 3560 3561 self._match(TokenType.CONNECT_BY) 3562 nocycle = self._match_text_seq("NOCYCLE") 3563 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3564 exp.Prior, this=self._parse_bitwise() 3565 ) 3566 connect = self._parse_conjunction() 3567 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3568 3569 if not start and self._match(TokenType.START_WITH): 3570 start = self._parse_conjunction() 3571 3572 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3573 3574 def _parse_name_as_expression(self) -> exp.Alias: 3575 return self.expression( 3576 exp.Alias, 3577 alias=self._parse_id_var(any_token=True), 3578 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3579 ) 3580 3581 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3582 if self._match_text_seq("INTERPOLATE"): 3583 return self._parse_wrapped_csv(self._parse_name_as_expression) 3584 return None 3585 3586 def _parse_order( 3587 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3588 ) -> t.Optional[exp.Expression]: 3589 siblings = None 3590 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3591 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3592 return this 3593 3594 siblings = True 3595 3596 return self.expression( 3597 exp.Order, 3598 this=this, 3599 expressions=self._parse_csv(self._parse_ordered), 3600 interpolate=self._parse_interpolate(), 3601 siblings=siblings, 3602 ) 3603 3604 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3605 if not self._match(token): 3606 return None 3607 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3608 3609 def _parse_ordered( 3610 self, parse_method: t.Optional[t.Callable] = None 3611 ) -> t.Optional[exp.Ordered]: 3612 this = parse_method() if parse_method else self._parse_conjunction() 3613 if not this: 3614 return None 3615 3616 asc = self._match(TokenType.ASC) 3617 desc = self._match(TokenType.DESC) or (asc and False) 3618 3619 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3620 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3621 3622 nulls_first = is_nulls_first or False 3623 explicitly_null_ordered = is_nulls_first or is_nulls_last 3624 3625 if ( 3626 not explicitly_null_ordered 3627 and ( 3628 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3629 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3630 ) 3631 and self.dialect.NULL_ORDERING != "nulls_are_last" 3632 ): 3633 nulls_first = True 3634 3635 if self._match_text_seq("WITH", "FILL"): 3636 with_fill = self.expression( 3637 exp.WithFill, 3638 **{ # type: ignore 3639 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3640 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3641 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3642 }, 3643 ) 3644 else: 3645 with_fill = None 3646 3647 return self.expression( 3648 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3649 ) 3650 3651 def _parse_limit( 3652 self, 3653 this: t.Optional[exp.Expression] = None, 3654 top: bool = False, 3655 skip_limit_token: bool = False, 3656 ) -> t.Optional[exp.Expression]: 3657 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3658 comments = self._prev_comments 3659 if top: 3660 limit_paren = self._match(TokenType.L_PAREN) 3661 expression = self._parse_term() if limit_paren else self._parse_number() 3662 3663 if limit_paren: 3664 self._match_r_paren() 3665 else: 3666 expression = self._parse_term() 3667 3668 if self._match(TokenType.COMMA): 3669 offset = expression 3670 expression = self._parse_term() 3671 else: 3672 offset = None 3673 3674 limit_exp = self.expression( 3675 exp.Limit, 3676 this=this, 3677 expression=expression, 3678 offset=offset, 3679 comments=comments, 3680 expressions=self._parse_limit_by(), 3681 ) 3682 3683 return limit_exp 3684 3685 if self._match(TokenType.FETCH): 3686 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3687 direction = self._prev.text.upper() if direction else "FIRST" 3688 3689 count = self._parse_field(tokens=self.FETCH_TOKENS) 3690 percent = self._match(TokenType.PERCENT) 3691 3692 self._match_set((TokenType.ROW, TokenType.ROWS)) 3693 3694 only = self._match_text_seq("ONLY") 3695 with_ties = self._match_text_seq("WITH", "TIES") 3696 3697 if only and with_ties: 3698 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3699 3700 return self.expression( 3701 exp.Fetch, 3702 direction=direction, 3703 count=count, 3704 percent=percent, 3705 with_ties=with_ties, 3706 ) 3707 3708 return this 3709 3710 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3711 if not self._match(TokenType.OFFSET): 3712 return this 3713 3714 count = self._parse_term() 3715 self._match_set((TokenType.ROW, TokenType.ROWS)) 3716 3717 return self.expression( 3718 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3719 ) 3720 3721 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3722 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3723 3724 def _parse_locks(self) -> t.List[exp.Lock]: 3725 locks = [] 3726 while True: 3727 if self._match_text_seq("FOR", "UPDATE"): 3728 update = True 3729 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3730 "LOCK", "IN", "SHARE", "MODE" 3731 ): 3732 update = False 3733 else: 3734 break 3735 3736 expressions = None 3737 if self._match_text_seq("OF"): 3738 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3739 3740 wait: t.Optional[bool | exp.Expression] = None 3741 if self._match_text_seq("NOWAIT"): 3742 wait = True 3743 elif self._match_text_seq("WAIT"): 3744 wait = self._parse_primary() 3745 elif self._match_text_seq("SKIP", "LOCKED"): 3746 wait = False 3747 3748 locks.append( 3749 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3750 ) 3751 3752 return locks 3753 3754 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3755 while this and self._match_set(self.SET_OPERATIONS): 3756 token_type = self._prev.token_type 3757 3758 if token_type == TokenType.UNION: 3759 operation = exp.Union 3760 elif token_type == TokenType.EXCEPT: 3761 operation = exp.Except 3762 else: 3763 operation = exp.Intersect 3764 3765 comments = self._prev.comments 3766 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3767 by_name = self._match_text_seq("BY", "NAME") 3768 expression = self._parse_select(nested=True, parse_set_operation=False) 3769 3770 this = self.expression( 3771 operation, 3772 comments=comments, 3773 this=this, 3774 distinct=distinct, 3775 by_name=by_name, 3776 expression=expression, 3777 ) 3778 3779 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3780 expression = this.expression 3781 3782 if expression: 3783 for arg in self.UNION_MODIFIERS: 3784 expr = expression.args.get(arg) 3785 if expr: 3786 this.set(arg, expr.pop()) 3787 3788 return this 3789 3790 def _parse_expression(self) -> t.Optional[exp.Expression]: 3791 return self._parse_alias(self._parse_conjunction()) 3792 3793 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3794 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3795 3796 def _parse_equality(self) -> t.Optional[exp.Expression]: 3797 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3798 3799 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3800 return self._parse_tokens(self._parse_range, self.COMPARISON) 3801 3802 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3803 this = this or self._parse_bitwise() 3804 negate = self._match(TokenType.NOT) 3805 3806 if self._match_set(self.RANGE_PARSERS): 3807 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3808 if not expression: 3809 return this 3810 3811 this = expression 3812 elif self._match(TokenType.ISNULL): 3813 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3814 3815 # Postgres supports ISNULL and NOTNULL for conditions. 3816 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3817 if self._match(TokenType.NOTNULL): 3818 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3819 this = self.expression(exp.Not, this=this) 3820 3821 if negate: 3822 this = self.expression(exp.Not, this=this) 3823 3824 if self._match(TokenType.IS): 3825 this = self._parse_is(this) 3826 3827 return this 3828 3829 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3830 index = self._index - 1 3831 negate = self._match(TokenType.NOT) 3832 3833 if self._match_text_seq("DISTINCT", "FROM"): 3834 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3835 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3836 3837 expression = self._parse_null() or self._parse_boolean() 3838 if not expression: 3839 self._retreat(index) 3840 return None 3841 3842 this = self.expression(exp.Is, this=this, expression=expression) 3843 return self.expression(exp.Not, this=this) if negate else this 3844 3845 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3846 unnest = self._parse_unnest(with_alias=False) 3847 if unnest: 3848 this = self.expression(exp.In, this=this, unnest=unnest) 3849 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3850 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3851 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3852 3853 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3854 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3855 else: 3856 this = self.expression(exp.In, this=this, expressions=expressions) 3857 3858 if matched_l_paren: 3859 self._match_r_paren(this) 3860 elif not self._match(TokenType.R_BRACKET, expression=this): 3861 self.raise_error("Expecting ]") 3862 else: 3863 this = self.expression(exp.In, this=this, field=self._parse_field()) 3864 3865 return this 3866 3867 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3868 low = self._parse_bitwise() 3869 self._match(TokenType.AND) 3870 high = self._parse_bitwise() 3871 return self.expression(exp.Between, this=this, low=low, high=high) 3872 3873 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3874 if not self._match(TokenType.ESCAPE): 3875 return this 3876 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3877 3878 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3879 index = self._index 3880 3881 if not self._match(TokenType.INTERVAL) and match_interval: 3882 return None 3883 3884 if self._match(TokenType.STRING, advance=False): 3885 this = self._parse_primary() 3886 else: 3887 this = self._parse_term() 3888 3889 if not this or ( 3890 isinstance(this, exp.Column) 3891 and not this.table 3892 and not this.this.quoted 3893 and this.name.upper() == "IS" 3894 ): 3895 self._retreat(index) 3896 return None 3897 3898 unit = self._parse_function() or ( 3899 not self._match(TokenType.ALIAS, advance=False) 3900 and self._parse_var(any_token=True, upper=True) 3901 ) 3902 3903 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3904 # each INTERVAL expression into this canonical form so it's easy to transpile 3905 if this and this.is_number: 3906 this = exp.Literal.string(this.name) 3907 elif this and this.is_string: 3908 parts = this.name.split() 3909 3910 if len(parts) == 2: 3911 if unit: 3912 # This is not actually a unit, it's something else (e.g. a "window side") 3913 unit = None 3914 self._retreat(self._index - 1) 3915 3916 this = exp.Literal.string(parts[0]) 3917 unit = self.expression(exp.Var, this=parts[1].upper()) 3918 3919 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3920 unit = self.expression( 3921 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3922 ) 3923 3924 return self.expression(exp.Interval, this=this, unit=unit) 3925 3926 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3927 this = self._parse_term() 3928 3929 while True: 3930 if self._match_set(self.BITWISE): 3931 this = self.expression( 3932 self.BITWISE[self._prev.token_type], 3933 this=this, 3934 expression=self._parse_term(), 3935 ) 3936 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3937 this = self.expression( 3938 exp.DPipe, 3939 this=this, 3940 expression=self._parse_term(), 3941 safe=not self.dialect.STRICT_STRING_CONCAT, 3942 ) 3943 elif self._match(TokenType.DQMARK): 3944 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3945 elif self._match_pair(TokenType.LT, TokenType.LT): 3946 this = self.expression( 3947 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3948 ) 3949 elif self._match_pair(TokenType.GT, TokenType.GT): 3950 this = self.expression( 3951 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3952 ) 3953 else: 3954 break 3955 3956 return this 3957 3958 def _parse_term(self) -> t.Optional[exp.Expression]: 3959 return self._parse_tokens(self._parse_factor, self.TERM) 3960 3961 def _parse_factor(self) -> t.Optional[exp.Expression]: 3962 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3963 this = parse_method() 3964 3965 while self._match_set(self.FACTOR): 3966 this = self.expression( 3967 self.FACTOR[self._prev.token_type], 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=parse_method(), 3971 ) 3972 if isinstance(this, exp.Div): 3973 this.args["typed"] = self.dialect.TYPED_DIVISION 3974 this.args["safe"] = self.dialect.SAFE_DIVISION 3975 3976 return this 3977 3978 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3979 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3980 3981 def _parse_unary(self) -> t.Optional[exp.Expression]: 3982 if self._match_set(self.UNARY_PARSERS): 3983 return self.UNARY_PARSERS[self._prev.token_type](self) 3984 return self._parse_at_time_zone(self._parse_type()) 3985 3986 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3987 interval = parse_interval and self._parse_interval() 3988 if interval: 3989 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3990 while True: 3991 index = self._index 3992 self._match(TokenType.PLUS) 3993 3994 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3995 self._retreat(index) 3996 break 3997 3998 interval = self.expression( # type: ignore 3999 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4000 ) 4001 4002 return interval 4003 4004 index = self._index 4005 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4006 this = self._parse_column() 4007 4008 if data_type: 4009 if isinstance(this, exp.Literal): 4010 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4011 if parser: 4012 return parser(self, this, data_type) 4013 return self.expression(exp.Cast, this=this, to=data_type) 4014 if not data_type.expressions: 4015 self._retreat(index) 4016 return self._parse_column() 4017 return self._parse_column_ops(data_type) 4018 4019 return this and self._parse_column_ops(this) 4020 4021 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4022 this = self._parse_type() 4023 if not this: 4024 return None 4025 4026 if isinstance(this, exp.Column) and not this.table: 4027 this = exp.var(this.name.upper()) 4028 4029 return self.expression( 4030 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4031 ) 4032 4033 def _parse_types( 4034 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4035 ) -> t.Optional[exp.Expression]: 4036 index = self._index 4037 4038 prefix = self._match_text_seq("SYSUDTLIB", ".") 4039 4040 if not self._match_set(self.TYPE_TOKENS): 4041 identifier = allow_identifiers and self._parse_id_var( 4042 any_token=False, tokens=(TokenType.VAR,) 4043 ) 4044 if identifier: 4045 tokens = self.dialect.tokenize(identifier.name) 4046 4047 if len(tokens) != 1: 4048 self.raise_error("Unexpected identifier", self._prev) 4049 4050 if tokens[0].token_type in self.TYPE_TOKENS: 4051 self._prev = tokens[0] 4052 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4053 type_name = identifier.name 4054 4055 while self._match(TokenType.DOT): 4056 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4057 4058 return exp.DataType.build(type_name, udt=True) 4059 else: 4060 self._retreat(self._index - 1) 4061 return None 4062 else: 4063 return None 4064 4065 type_token = self._prev.token_type 4066 4067 if type_token == TokenType.PSEUDO_TYPE: 4068 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4069 4070 if type_token == TokenType.OBJECT_IDENTIFIER: 4071 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4072 4073 nested = type_token in self.NESTED_TYPE_TOKENS 4074 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4075 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4076 expressions = None 4077 maybe_func = False 4078 4079 if self._match(TokenType.L_PAREN): 4080 if is_struct: 4081 expressions = self._parse_csv(self._parse_struct_types) 4082 elif nested: 4083 expressions = self._parse_csv( 4084 lambda: self._parse_types( 4085 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4086 ) 4087 ) 4088 elif type_token in self.ENUM_TYPE_TOKENS: 4089 expressions = self._parse_csv(self._parse_equality) 4090 elif is_aggregate: 4091 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4092 any_token=False, tokens=(TokenType.VAR,) 4093 ) 4094 if not func_or_ident or not self._match(TokenType.COMMA): 4095 return None 4096 expressions = self._parse_csv( 4097 lambda: self._parse_types( 4098 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4099 ) 4100 ) 4101 expressions.insert(0, func_or_ident) 4102 else: 4103 expressions = self._parse_csv(self._parse_type_size) 4104 4105 if not expressions or not self._match(TokenType.R_PAREN): 4106 self._retreat(index) 4107 return None 4108 4109 maybe_func = True 4110 4111 this: t.Optional[exp.Expression] = None 4112 values: t.Optional[t.List[exp.Expression]] = None 4113 4114 if nested and self._match(TokenType.LT): 4115 if is_struct: 4116 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4117 else: 4118 expressions = self._parse_csv( 4119 lambda: self._parse_types( 4120 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4121 ) 4122 ) 4123 4124 if not self._match(TokenType.GT): 4125 self.raise_error("Expecting >") 4126 4127 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4128 values = self._parse_csv(self._parse_conjunction) 4129 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4130 4131 if type_token in self.TIMESTAMPS: 4132 if self._match_text_seq("WITH", "TIME", "ZONE"): 4133 maybe_func = False 4134 tz_type = ( 4135 exp.DataType.Type.TIMETZ 4136 if type_token in self.TIMES 4137 else exp.DataType.Type.TIMESTAMPTZ 4138 ) 4139 this = exp.DataType(this=tz_type, expressions=expressions) 4140 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4141 maybe_func = False 4142 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4143 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4144 maybe_func = False 4145 elif type_token == TokenType.INTERVAL: 4146 unit = self._parse_var(upper=True) 4147 if unit: 4148 if self._match_text_seq("TO"): 4149 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4150 4151 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4152 else: 4153 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4154 4155 if maybe_func and check_func: 4156 index2 = self._index 4157 peek = self._parse_string() 4158 4159 if not peek: 4160 self._retreat(index) 4161 return None 4162 4163 self._retreat(index2) 4164 4165 if not this: 4166 if self._match_text_seq("UNSIGNED"): 4167 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4168 if not unsigned_type_token: 4169 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4170 4171 type_token = unsigned_type_token or type_token 4172 4173 this = exp.DataType( 4174 this=exp.DataType.Type[type_token.value], 4175 expressions=expressions, 4176 nested=nested, 4177 values=values, 4178 prefix=prefix, 4179 ) 4180 4181 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4182 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4183 4184 return this 4185 4186 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4187 index = self._index 4188 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4189 self._match(TokenType.COLON) 4190 column_def = self._parse_column_def(this) 4191 4192 if type_required and ( 4193 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4194 ): 4195 self._retreat(index) 4196 return self._parse_types() 4197 4198 return column_def 4199 4200 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4201 if not self._match_text_seq("AT", "TIME", "ZONE"): 4202 return this 4203 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4204 4205 def _parse_column(self) -> t.Optional[exp.Expression]: 4206 this = self._parse_column_reference() 4207 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4208 4209 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4210 this = self._parse_field() 4211 if ( 4212 not this 4213 and self._match(TokenType.VALUES, advance=False) 4214 and self.VALUES_FOLLOWED_BY_PAREN 4215 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4216 ): 4217 this = self._parse_id_var() 4218 4219 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4220 4221 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4222 this = self._parse_bracket(this) 4223 4224 while self._match_set(self.COLUMN_OPERATORS): 4225 op_token = self._prev.token_type 4226 op = self.COLUMN_OPERATORS.get(op_token) 4227 4228 if op_token == TokenType.DCOLON: 4229 field = self._parse_types() 4230 if not field: 4231 self.raise_error("Expected type") 4232 elif op and self._curr: 4233 field = self._parse_column_reference() 4234 else: 4235 field = self._parse_field(any_token=True, anonymous_func=True) 4236 4237 if isinstance(field, exp.Func) and this: 4238 # bigquery allows function calls like x.y.count(...) 4239 # SAFE.SUBSTR(...) 4240 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4241 this = exp.replace_tree( 4242 this, 4243 lambda n: ( 4244 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4245 if n.table 4246 else n.this 4247 ) 4248 if isinstance(n, exp.Column) 4249 else n, 4250 ) 4251 4252 if op: 4253 this = op(self, this, field) 4254 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4255 this = self.expression( 4256 exp.Column, 4257 this=field, 4258 table=this.this, 4259 db=this.args.get("table"), 4260 catalog=this.args.get("db"), 4261 ) 4262 else: 4263 this = self.expression(exp.Dot, this=this, expression=field) 4264 this = self._parse_bracket(this) 4265 return this 4266 4267 def _parse_primary(self) -> t.Optional[exp.Expression]: 4268 if self._match_set(self.PRIMARY_PARSERS): 4269 token_type = self._prev.token_type 4270 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4271 4272 if token_type == TokenType.STRING: 4273 expressions = [primary] 4274 while self._match(TokenType.STRING): 4275 expressions.append(exp.Literal.string(self._prev.text)) 4276 4277 if len(expressions) > 1: 4278 return self.expression(exp.Concat, expressions=expressions) 4279 4280 return primary 4281 4282 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4283 return exp.Literal.number(f"0.{self._prev.text}") 4284 4285 if self._match(TokenType.L_PAREN): 4286 comments = self._prev_comments 4287 query = self._parse_select() 4288 4289 if query: 4290 expressions = [query] 4291 else: 4292 expressions = self._parse_expressions() 4293 4294 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4295 4296 if isinstance(this, exp.UNWRAPPED_QUERIES): 4297 this = self._parse_set_operations( 4298 self._parse_subquery(this=this, parse_alias=False) 4299 ) 4300 elif isinstance(this, exp.Subquery): 4301 this = self._parse_subquery( 4302 this=self._parse_set_operations(this), parse_alias=False 4303 ) 4304 elif len(expressions) > 1: 4305 this = self.expression(exp.Tuple, expressions=expressions) 4306 else: 4307 this = self.expression(exp.Paren, this=this) 4308 4309 if this: 4310 this.add_comments(comments) 4311 4312 self._match_r_paren(expression=this) 4313 return this 4314 4315 return None 4316 4317 def _parse_field( 4318 self, 4319 any_token: bool = False, 4320 tokens: t.Optional[t.Collection[TokenType]] = None, 4321 anonymous_func: bool = False, 4322 ) -> t.Optional[exp.Expression]: 4323 if anonymous_func: 4324 field = ( 4325 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4326 or self._parse_primary() 4327 ) 4328 else: 4329 field = self._parse_primary() or self._parse_function( 4330 anonymous=anonymous_func, any_token=any_token 4331 ) 4332 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4333 4334 def _parse_function( 4335 self, 4336 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4337 anonymous: bool = False, 4338 optional_parens: bool = True, 4339 any_token: bool = False, 4340 ) -> t.Optional[exp.Expression]: 4341 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4342 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4343 fn_syntax = False 4344 if ( 4345 self._match(TokenType.L_BRACE, advance=False) 4346 and self._next 4347 and self._next.text.upper() == "FN" 4348 ): 4349 self._advance(2) 4350 fn_syntax = True 4351 4352 func = self._parse_function_call( 4353 functions=functions, 4354 anonymous=anonymous, 4355 optional_parens=optional_parens, 4356 any_token=any_token, 4357 ) 4358 4359 if fn_syntax: 4360 self._match(TokenType.R_BRACE) 4361 4362 return func 4363 4364 def _parse_function_call( 4365 self, 4366 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4367 anonymous: bool = False, 4368 optional_parens: bool = True, 4369 any_token: bool = False, 4370 ) -> t.Optional[exp.Expression]: 4371 if not self._curr: 4372 return None 4373 4374 comments = self._curr.comments 4375 token_type = self._curr.token_type 4376 this = self._curr.text 4377 upper = this.upper() 4378 4379 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4380 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4381 self._advance() 4382 return self._parse_window(parser(self)) 4383 4384 if not self._next or self._next.token_type != TokenType.L_PAREN: 4385 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4386 self._advance() 4387 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4388 4389 return None 4390 4391 if not any_token and token_type not in self.FUNC_TOKENS: 4392 return None 4393 4394 self._advance(2) 4395 4396 parser = self.FUNCTION_PARSERS.get(upper) 4397 if parser and not anonymous: 4398 this = parser(self) 4399 else: 4400 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4401 4402 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4403 this = self.expression(subquery_predicate, this=self._parse_select()) 4404 self._match_r_paren() 4405 return this 4406 4407 if functions is None: 4408 functions = self.FUNCTIONS 4409 4410 function = functions.get(upper) 4411 4412 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4413 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4414 4415 if alias: 4416 args = self._kv_to_prop_eq(args) 4417 4418 if function and not anonymous: 4419 if "dialect" in function.__code__.co_varnames: 4420 func = function(args, dialect=self.dialect) 4421 else: 4422 func = function(args) 4423 4424 func = self.validate_expression(func, args) 4425 if not self.dialect.NORMALIZE_FUNCTIONS: 4426 func.meta["name"] = this 4427 4428 this = func 4429 else: 4430 if token_type == TokenType.IDENTIFIER: 4431 this = exp.Identifier(this=this, quoted=True) 4432 this = self.expression(exp.Anonymous, this=this, expressions=args) 4433 4434 if isinstance(this, exp.Expression): 4435 this.add_comments(comments) 4436 4437 self._match_r_paren(this) 4438 return self._parse_window(this) 4439 4440 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4441 transformed = [] 4442 4443 for e in expressions: 4444 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4445 if isinstance(e, exp.Alias): 4446 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4447 4448 if not isinstance(e, exp.PropertyEQ): 4449 e = self.expression( 4450 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4451 ) 4452 4453 if isinstance(e.this, exp.Column): 4454 e.this.replace(e.this.this) 4455 4456 transformed.append(e) 4457 4458 return transformed 4459 4460 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4461 return self._parse_column_def(self._parse_id_var()) 4462 4463 def _parse_user_defined_function( 4464 self, kind: t.Optional[TokenType] = None 4465 ) -> t.Optional[exp.Expression]: 4466 this = self._parse_id_var() 4467 4468 while self._match(TokenType.DOT): 4469 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4470 4471 if not self._match(TokenType.L_PAREN): 4472 return this 4473 4474 expressions = self._parse_csv(self._parse_function_parameter) 4475 self._match_r_paren() 4476 return self.expression( 4477 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4478 ) 4479 4480 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4481 literal = self._parse_primary() 4482 if literal: 4483 return self.expression(exp.Introducer, this=token.text, expression=literal) 4484 4485 return self.expression(exp.Identifier, this=token.text) 4486 4487 def _parse_session_parameter(self) -> exp.SessionParameter: 4488 kind = None 4489 this = self._parse_id_var() or self._parse_primary() 4490 4491 if this and self._match(TokenType.DOT): 4492 kind = this.name 4493 this = self._parse_var() or self._parse_primary() 4494 4495 return self.expression(exp.SessionParameter, this=this, kind=kind) 4496 4497 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4498 index = self._index 4499 4500 if self._match(TokenType.L_PAREN): 4501 expressions = t.cast( 4502 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4503 ) 4504 4505 if not self._match(TokenType.R_PAREN): 4506 self._retreat(index) 4507 else: 4508 expressions = [self._parse_id_var()] 4509 4510 if self._match_set(self.LAMBDAS): 4511 return self.LAMBDAS[self._prev.token_type](self, expressions) 4512 4513 self._retreat(index) 4514 4515 this: t.Optional[exp.Expression] 4516 4517 if self._match(TokenType.DISTINCT): 4518 this = self.expression( 4519 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4520 ) 4521 else: 4522 this = self._parse_select_or_expression(alias=alias) 4523 4524 return self._parse_limit( 4525 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4526 ) 4527 4528 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4529 index = self._index 4530 if not self._match(TokenType.L_PAREN): 4531 return this 4532 4533 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4534 # expr can be of both types 4535 if self._match_set(self.SELECT_START_TOKENS): 4536 self._retreat(index) 4537 return this 4538 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4539 self._match_r_paren() 4540 return self.expression(exp.Schema, this=this, expressions=args) 4541 4542 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4543 return self._parse_column_def(self._parse_field(any_token=True)) 4544 4545 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4546 # column defs are not really columns, they're identifiers 4547 if isinstance(this, exp.Column): 4548 this = this.this 4549 4550 kind = self._parse_types(schema=True) 4551 4552 if self._match_text_seq("FOR", "ORDINALITY"): 4553 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4554 4555 constraints: t.List[exp.Expression] = [] 4556 4557 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4558 ("ALIAS", "MATERIALIZED") 4559 ): 4560 persisted = self._prev.text.upper() == "MATERIALIZED" 4561 constraints.append( 4562 self.expression( 4563 exp.ComputedColumnConstraint, 4564 this=self._parse_conjunction(), 4565 persisted=persisted or self._match_text_seq("PERSISTED"), 4566 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4567 ) 4568 ) 4569 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4570 self._match(TokenType.ALIAS) 4571 constraints.append( 4572 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4573 ) 4574 4575 while True: 4576 constraint = self._parse_column_constraint() 4577 if not constraint: 4578 break 4579 constraints.append(constraint) 4580 4581 if not kind and not constraints: 4582 return this 4583 4584 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4585 4586 def _parse_auto_increment( 4587 self, 4588 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4589 start = None 4590 increment = None 4591 4592 if self._match(TokenType.L_PAREN, advance=False): 4593 args = self._parse_wrapped_csv(self._parse_bitwise) 4594 start = seq_get(args, 0) 4595 increment = seq_get(args, 1) 4596 elif self._match_text_seq("START"): 4597 start = self._parse_bitwise() 4598 self._match_text_seq("INCREMENT") 4599 increment = self._parse_bitwise() 4600 4601 if start and increment: 4602 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4603 4604 return exp.AutoIncrementColumnConstraint() 4605 4606 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4607 if not self._match_text_seq("REFRESH"): 4608 self._retreat(self._index - 1) 4609 return None 4610 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4611 4612 def _parse_compress(self) -> exp.CompressColumnConstraint: 4613 if self._match(TokenType.L_PAREN, advance=False): 4614 return self.expression( 4615 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4616 ) 4617 4618 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4619 4620 def _parse_generated_as_identity( 4621 self, 4622 ) -> ( 4623 exp.GeneratedAsIdentityColumnConstraint 4624 | exp.ComputedColumnConstraint 4625 | exp.GeneratedAsRowColumnConstraint 4626 ): 4627 if self._match_text_seq("BY", "DEFAULT"): 4628 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4629 this = self.expression( 4630 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4631 ) 4632 else: 4633 self._match_text_seq("ALWAYS") 4634 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4635 4636 self._match(TokenType.ALIAS) 4637 4638 if self._match_text_seq("ROW"): 4639 start = self._match_text_seq("START") 4640 if not start: 4641 self._match(TokenType.END) 4642 hidden = self._match_text_seq("HIDDEN") 4643 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4644 4645 identity = self._match_text_seq("IDENTITY") 4646 4647 if self._match(TokenType.L_PAREN): 4648 if self._match(TokenType.START_WITH): 4649 this.set("start", self._parse_bitwise()) 4650 if self._match_text_seq("INCREMENT", "BY"): 4651 this.set("increment", self._parse_bitwise()) 4652 if self._match_text_seq("MINVALUE"): 4653 this.set("minvalue", self._parse_bitwise()) 4654 if self._match_text_seq("MAXVALUE"): 4655 this.set("maxvalue", self._parse_bitwise()) 4656 4657 if self._match_text_seq("CYCLE"): 4658 this.set("cycle", True) 4659 elif self._match_text_seq("NO", "CYCLE"): 4660 this.set("cycle", False) 4661 4662 if not identity: 4663 this.set("expression", self._parse_bitwise()) 4664 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4665 args = self._parse_csv(self._parse_bitwise) 4666 this.set("start", seq_get(args, 0)) 4667 this.set("increment", seq_get(args, 1)) 4668 4669 self._match_r_paren() 4670 4671 return this 4672 4673 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4674 self._match_text_seq("LENGTH") 4675 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4676 4677 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4678 if self._match_text_seq("NULL"): 4679 return self.expression(exp.NotNullColumnConstraint) 4680 if self._match_text_seq("CASESPECIFIC"): 4681 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4682 if self._match_text_seq("FOR", "REPLICATION"): 4683 return self.expression(exp.NotForReplicationColumnConstraint) 4684 return None 4685 4686 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4687 if self._match(TokenType.CONSTRAINT): 4688 this = self._parse_id_var() 4689 else: 4690 this = None 4691 4692 if self._match_texts(self.CONSTRAINT_PARSERS): 4693 return self.expression( 4694 exp.ColumnConstraint, 4695 this=this, 4696 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4697 ) 4698 4699 return this 4700 4701 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4702 if not self._match(TokenType.CONSTRAINT): 4703 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4704 4705 return self.expression( 4706 exp.Constraint, 4707 this=self._parse_id_var(), 4708 expressions=self._parse_unnamed_constraints(), 4709 ) 4710 4711 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4712 constraints = [] 4713 while True: 4714 constraint = self._parse_unnamed_constraint() or self._parse_function() 4715 if not constraint: 4716 break 4717 constraints.append(constraint) 4718 4719 return constraints 4720 4721 def _parse_unnamed_constraint( 4722 self, constraints: t.Optional[t.Collection[str]] = None 4723 ) -> t.Optional[exp.Expression]: 4724 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4725 constraints or self.CONSTRAINT_PARSERS 4726 ): 4727 return None 4728 4729 constraint = self._prev.text.upper() 4730 if constraint not in self.CONSTRAINT_PARSERS: 4731 self.raise_error(f"No parser found for schema constraint {constraint}.") 4732 4733 return self.CONSTRAINT_PARSERS[constraint](self) 4734 4735 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4736 self._match_text_seq("KEY") 4737 return self.expression( 4738 exp.UniqueColumnConstraint, 4739 this=self._parse_schema(self._parse_id_var(any_token=False)), 4740 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4741 on_conflict=self._parse_on_conflict(), 4742 ) 4743 4744 def _parse_key_constraint_options(self) -> t.List[str]: 4745 options = [] 4746 while True: 4747 if not self._curr: 4748 break 4749 4750 if self._match(TokenType.ON): 4751 action = None 4752 on = self._advance_any() and self._prev.text 4753 4754 if self._match_text_seq("NO", "ACTION"): 4755 action = "NO ACTION" 4756 elif self._match_text_seq("CASCADE"): 4757 action = "CASCADE" 4758 elif self._match_text_seq("RESTRICT"): 4759 action = "RESTRICT" 4760 elif self._match_pair(TokenType.SET, TokenType.NULL): 4761 action = "SET NULL" 4762 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4763 action = "SET DEFAULT" 4764 else: 4765 self.raise_error("Invalid key constraint") 4766 4767 options.append(f"ON {on} {action}") 4768 elif self._match_text_seq("NOT", "ENFORCED"): 4769 options.append("NOT ENFORCED") 4770 elif self._match_text_seq("DEFERRABLE"): 4771 options.append("DEFERRABLE") 4772 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4773 options.append("INITIALLY DEFERRED") 4774 elif self._match_text_seq("NORELY"): 4775 options.append("NORELY") 4776 elif self._match_text_seq("MATCH", "FULL"): 4777 options.append("MATCH FULL") 4778 else: 4779 break 4780 4781 return options 4782 4783 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4784 if match and not self._match(TokenType.REFERENCES): 4785 return None 4786 4787 expressions = None 4788 this = self._parse_table(schema=True) 4789 options = self._parse_key_constraint_options() 4790 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4791 4792 def _parse_foreign_key(self) -> exp.ForeignKey: 4793 expressions = self._parse_wrapped_id_vars() 4794 reference = self._parse_references() 4795 options = {} 4796 4797 while self._match(TokenType.ON): 4798 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4799 self.raise_error("Expected DELETE or UPDATE") 4800 4801 kind = self._prev.text.lower() 4802 4803 if self._match_text_seq("NO", "ACTION"): 4804 action = "NO ACTION" 4805 elif self._match(TokenType.SET): 4806 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4807 action = "SET " + self._prev.text.upper() 4808 else: 4809 self._advance() 4810 action = self._prev.text.upper() 4811 4812 options[kind] = action 4813 4814 return self.expression( 4815 exp.ForeignKey, 4816 expressions=expressions, 4817 reference=reference, 4818 **options, # type: ignore 4819 ) 4820 4821 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4822 return self._parse_field() 4823 4824 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4825 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4826 self._retreat(self._index - 1) 4827 return None 4828 4829 id_vars = self._parse_wrapped_id_vars() 4830 return self.expression( 4831 exp.PeriodForSystemTimeConstraint, 4832 this=seq_get(id_vars, 0), 4833 expression=seq_get(id_vars, 1), 4834 ) 4835 4836 def _parse_primary_key( 4837 self, wrapped_optional: bool = False, in_props: bool = False 4838 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4839 desc = ( 4840 self._match_set((TokenType.ASC, TokenType.DESC)) 4841 and self._prev.token_type == TokenType.DESC 4842 ) 4843 4844 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4845 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4846 4847 expressions = self._parse_wrapped_csv( 4848 self._parse_primary_key_part, optional=wrapped_optional 4849 ) 4850 options = self._parse_key_constraint_options() 4851 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4852 4853 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4854 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4855 4856 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4857 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4858 return this 4859 4860 bracket_kind = self._prev.token_type 4861 expressions = self._parse_csv( 4862 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4863 ) 4864 4865 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4866 self.raise_error("Expected ]") 4867 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4868 self.raise_error("Expected }") 4869 4870 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4871 if bracket_kind == TokenType.L_BRACE: 4872 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4873 elif not this or this.name.upper() == "ARRAY": 4874 this = self.expression(exp.Array, expressions=expressions) 4875 else: 4876 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4877 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4878 4879 self._add_comments(this) 4880 return self._parse_bracket(this) 4881 4882 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4883 if self._match(TokenType.COLON): 4884 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4885 return this 4886 4887 def _parse_case(self) -> t.Optional[exp.Expression]: 4888 ifs = [] 4889 default = None 4890 4891 comments = self._prev_comments 4892 expression = self._parse_conjunction() 4893 4894 while self._match(TokenType.WHEN): 4895 this = self._parse_conjunction() 4896 self._match(TokenType.THEN) 4897 then = self._parse_conjunction() 4898 ifs.append(self.expression(exp.If, this=this, true=then)) 4899 4900 if self._match(TokenType.ELSE): 4901 default = self._parse_conjunction() 4902 4903 if not self._match(TokenType.END): 4904 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4905 default = exp.column("interval") 4906 else: 4907 self.raise_error("Expected END after CASE", self._prev) 4908 4909 return self.expression( 4910 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4911 ) 4912 4913 def _parse_if(self) -> t.Optional[exp.Expression]: 4914 if self._match(TokenType.L_PAREN): 4915 args = self._parse_csv(self._parse_conjunction) 4916 this = self.validate_expression(exp.If.from_arg_list(args), args) 4917 self._match_r_paren() 4918 else: 4919 index = self._index - 1 4920 4921 if self.NO_PAREN_IF_COMMANDS and index == 0: 4922 return self._parse_as_command(self._prev) 4923 4924 condition = self._parse_conjunction() 4925 4926 if not condition: 4927 self._retreat(index) 4928 return None 4929 4930 self._match(TokenType.THEN) 4931 true = self._parse_conjunction() 4932 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4933 self._match(TokenType.END) 4934 this = self.expression(exp.If, this=condition, true=true, false=false) 4935 4936 return this 4937 4938 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4939 if not self._match_text_seq("VALUE", "FOR"): 4940 self._retreat(self._index - 1) 4941 return None 4942 4943 return self.expression( 4944 exp.NextValueFor, 4945 this=self._parse_column(), 4946 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4947 ) 4948 4949 def _parse_extract(self) -> exp.Extract: 4950 this = self._parse_function() or self._parse_var() or self._parse_type() 4951 4952 if self._match(TokenType.FROM): 4953 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4954 4955 if not self._match(TokenType.COMMA): 4956 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4957 4958 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4959 4960 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4961 this = self._parse_conjunction() 4962 4963 if not self._match(TokenType.ALIAS): 4964 if self._match(TokenType.COMMA): 4965 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4966 4967 self.raise_error("Expected AS after CAST") 4968 4969 fmt = None 4970 to = self._parse_types() 4971 4972 if self._match(TokenType.FORMAT): 4973 fmt_string = self._parse_string() 4974 fmt = self._parse_at_time_zone(fmt_string) 4975 4976 if not to: 4977 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4978 if to.this in exp.DataType.TEMPORAL_TYPES: 4979 this = self.expression( 4980 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4981 this=this, 4982 format=exp.Literal.string( 4983 format_time( 4984 fmt_string.this if fmt_string else "", 4985 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4986 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4987 ) 4988 ), 4989 ) 4990 4991 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4992 this.set("zone", fmt.args["zone"]) 4993 return this 4994 elif not to: 4995 self.raise_error("Expected TYPE after CAST") 4996 elif isinstance(to, exp.Identifier): 4997 to = exp.DataType.build(to.name, udt=True) 4998 elif to.this == exp.DataType.Type.CHAR: 4999 if self._match(TokenType.CHARACTER_SET): 5000 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5001 5002 return self.expression( 5003 exp.Cast if strict else exp.TryCast, 5004 this=this, 5005 to=to, 5006 format=fmt, 5007 safe=safe, 5008 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5009 ) 5010 5011 def _parse_string_agg(self) -> exp.Expression: 5012 if self._match(TokenType.DISTINCT): 5013 args: t.List[t.Optional[exp.Expression]] = [ 5014 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5015 ] 5016 if self._match(TokenType.COMMA): 5017 args.extend(self._parse_csv(self._parse_conjunction)) 5018 else: 5019 args = self._parse_csv(self._parse_conjunction) # type: ignore 5020 5021 index = self._index 5022 if not self._match(TokenType.R_PAREN) and args: 5023 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5024 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5025 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5026 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5027 5028 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5029 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5030 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5031 if not self._match_text_seq("WITHIN", "GROUP"): 5032 self._retreat(index) 5033 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5034 5035 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5036 order = self._parse_order(this=seq_get(args, 0)) 5037 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5038 5039 def _parse_convert( 5040 self, strict: bool, safe: t.Optional[bool] = None 5041 ) -> t.Optional[exp.Expression]: 5042 this = self._parse_bitwise() 5043 5044 if self._match(TokenType.USING): 5045 to: t.Optional[exp.Expression] = self.expression( 5046 exp.CharacterSet, this=self._parse_var() 5047 ) 5048 elif self._match(TokenType.COMMA): 5049 to = self._parse_types() 5050 else: 5051 to = None 5052 5053 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5054 5055 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5056 """ 5057 There are generally two variants of the DECODE function: 5058 5059 - DECODE(bin, charset) 5060 - DECODE(expression, search, result [, search, result] ... [, default]) 5061 5062 The second variant will always be parsed into a CASE expression. Note that NULL 5063 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5064 instead of relying on pattern matching. 5065 """ 5066 args = self._parse_csv(self._parse_conjunction) 5067 5068 if len(args) < 3: 5069 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5070 5071 expression, *expressions = args 5072 if not expression: 5073 return None 5074 5075 ifs = [] 5076 for search, result in zip(expressions[::2], expressions[1::2]): 5077 if not search or not result: 5078 return None 5079 5080 if isinstance(search, exp.Literal): 5081 ifs.append( 5082 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5083 ) 5084 elif isinstance(search, exp.Null): 5085 ifs.append( 5086 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5087 ) 5088 else: 5089 cond = exp.or_( 5090 exp.EQ(this=expression.copy(), expression=search), 5091 exp.and_( 5092 exp.Is(this=expression.copy(), expression=exp.Null()), 5093 exp.Is(this=search.copy(), expression=exp.Null()), 5094 copy=False, 5095 ), 5096 copy=False, 5097 ) 5098 ifs.append(exp.If(this=cond, true=result)) 5099 5100 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5101 5102 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5103 self._match_text_seq("KEY") 5104 key = self._parse_column() 5105 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5106 self._match_text_seq("VALUE") 5107 value = self._parse_bitwise() 5108 5109 if not key and not value: 5110 return None 5111 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5112 5113 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5114 if not this or not self._match_text_seq("FORMAT", "JSON"): 5115 return this 5116 5117 return self.expression(exp.FormatJson, this=this) 5118 5119 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5120 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5121 for value in values: 5122 if self._match_text_seq(value, "ON", on): 5123 return f"{value} ON {on}" 5124 5125 return None 5126 5127 @t.overload 5128 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5129 5130 @t.overload 5131 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5132 5133 def _parse_json_object(self, agg=False): 5134 star = self._parse_star() 5135 expressions = ( 5136 [star] 5137 if star 5138 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5139 ) 5140 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5141 5142 unique_keys = None 5143 if self._match_text_seq("WITH", "UNIQUE"): 5144 unique_keys = True 5145 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5146 unique_keys = False 5147 5148 self._match_text_seq("KEYS") 5149 5150 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5151 self._parse_type() 5152 ) 5153 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5154 5155 return self.expression( 5156 exp.JSONObjectAgg if agg else exp.JSONObject, 5157 expressions=expressions, 5158 null_handling=null_handling, 5159 unique_keys=unique_keys, 5160 return_type=return_type, 5161 encoding=encoding, 5162 ) 5163 5164 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5165 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5166 if not self._match_text_seq("NESTED"): 5167 this = self._parse_id_var() 5168 kind = self._parse_types(allow_identifiers=False) 5169 nested = None 5170 else: 5171 this = None 5172 kind = None 5173 nested = True 5174 5175 path = self._match_text_seq("PATH") and self._parse_string() 5176 nested_schema = nested and self._parse_json_schema() 5177 5178 return self.expression( 5179 exp.JSONColumnDef, 5180 this=this, 5181 kind=kind, 5182 path=path, 5183 nested_schema=nested_schema, 5184 ) 5185 5186 def _parse_json_schema(self) -> exp.JSONSchema: 5187 self._match_text_seq("COLUMNS") 5188 return self.expression( 5189 exp.JSONSchema, 5190 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5191 ) 5192 5193 def _parse_json_table(self) -> exp.JSONTable: 5194 this = self._parse_format_json(self._parse_bitwise()) 5195 path = self._match(TokenType.COMMA) and self._parse_string() 5196 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5197 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5198 schema = self._parse_json_schema() 5199 5200 return exp.JSONTable( 5201 this=this, 5202 schema=schema, 5203 path=path, 5204 error_handling=error_handling, 5205 empty_handling=empty_handling, 5206 ) 5207 5208 def _parse_match_against(self) -> exp.MatchAgainst: 5209 expressions = self._parse_csv(self._parse_column) 5210 5211 self._match_text_seq(")", "AGAINST", "(") 5212 5213 this = self._parse_string() 5214 5215 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5216 modifier = "IN NATURAL LANGUAGE MODE" 5217 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5218 modifier = f"{modifier} WITH QUERY EXPANSION" 5219 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5220 modifier = "IN BOOLEAN MODE" 5221 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5222 modifier = "WITH QUERY EXPANSION" 5223 else: 5224 modifier = None 5225 5226 return self.expression( 5227 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5228 ) 5229 5230 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5231 def _parse_open_json(self) -> exp.OpenJSON: 5232 this = self._parse_bitwise() 5233 path = self._match(TokenType.COMMA) and self._parse_string() 5234 5235 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5236 this = self._parse_field(any_token=True) 5237 kind = self._parse_types() 5238 path = self._parse_string() 5239 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5240 5241 return self.expression( 5242 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5243 ) 5244 5245 expressions = None 5246 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5247 self._match_l_paren() 5248 expressions = self._parse_csv(_parse_open_json_column_def) 5249 5250 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5251 5252 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5253 args = self._parse_csv(self._parse_bitwise) 5254 5255 if self._match(TokenType.IN): 5256 return self.expression( 5257 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5258 ) 5259 5260 if haystack_first: 5261 haystack = seq_get(args, 0) 5262 needle = seq_get(args, 1) 5263 else: 5264 needle = seq_get(args, 0) 5265 haystack = seq_get(args, 1) 5266 5267 return self.expression( 5268 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5269 ) 5270 5271 def _parse_predict(self) -> exp.Predict: 5272 self._match_text_seq("MODEL") 5273 this = self._parse_table() 5274 5275 self._match(TokenType.COMMA) 5276 self._match_text_seq("TABLE") 5277 5278 return self.expression( 5279 exp.Predict, 5280 this=this, 5281 expression=self._parse_table(), 5282 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5283 ) 5284 5285 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5286 args = self._parse_csv(self._parse_table) 5287 return exp.JoinHint(this=func_name.upper(), expressions=args) 5288 5289 def _parse_substring(self) -> exp.Substring: 5290 # Postgres supports the form: substring(string [from int] [for int]) 5291 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5292 5293 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5294 5295 if self._match(TokenType.FROM): 5296 args.append(self._parse_bitwise()) 5297 if self._match(TokenType.FOR): 5298 args.append(self._parse_bitwise()) 5299 5300 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5301 5302 def _parse_trim(self) -> exp.Trim: 5303 # https://www.w3resource.com/sql/character-functions/trim.php 5304 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5305 5306 position = None 5307 collation = None 5308 expression = None 5309 5310 if self._match_texts(self.TRIM_TYPES): 5311 position = self._prev.text.upper() 5312 5313 this = self._parse_bitwise() 5314 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5315 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5316 expression = self._parse_bitwise() 5317 5318 if invert_order: 5319 this, expression = expression, this 5320 5321 if self._match(TokenType.COLLATE): 5322 collation = self._parse_bitwise() 5323 5324 return self.expression( 5325 exp.Trim, this=this, position=position, expression=expression, collation=collation 5326 ) 5327 5328 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5329 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5330 5331 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5332 return self._parse_window(self._parse_id_var(), alias=True) 5333 5334 def _parse_respect_or_ignore_nulls( 5335 self, this: t.Optional[exp.Expression] 5336 ) -> t.Optional[exp.Expression]: 5337 if self._match_text_seq("IGNORE", "NULLS"): 5338 return self.expression(exp.IgnoreNulls, this=this) 5339 if self._match_text_seq("RESPECT", "NULLS"): 5340 return self.expression(exp.RespectNulls, this=this) 5341 return this 5342 5343 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5344 if self._match(TokenType.HAVING): 5345 self._match_texts(("MAX", "MIN")) 5346 max = self._prev.text.upper() != "MIN" 5347 return self.expression( 5348 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5349 ) 5350 5351 return this 5352 5353 def _parse_window( 5354 self, this: t.Optional[exp.Expression], alias: bool = False 5355 ) -> t.Optional[exp.Expression]: 5356 func = this 5357 comments = func.comments if isinstance(func, exp.Expression) else None 5358 5359 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5360 self._match(TokenType.WHERE) 5361 this = self.expression( 5362 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5363 ) 5364 self._match_r_paren() 5365 5366 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5367 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5368 if self._match_text_seq("WITHIN", "GROUP"): 5369 order = self._parse_wrapped(self._parse_order) 5370 this = self.expression(exp.WithinGroup, this=this, expression=order) 5371 5372 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5373 # Some dialects choose to implement and some do not. 5374 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5375 5376 # There is some code above in _parse_lambda that handles 5377 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5378 5379 # The below changes handle 5380 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5381 5382 # Oracle allows both formats 5383 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5384 # and Snowflake chose to do the same for familiarity 5385 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5386 if isinstance(this, exp.AggFunc): 5387 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5388 5389 if ignore_respect and ignore_respect is not this: 5390 ignore_respect.replace(ignore_respect.this) 5391 this = self.expression(ignore_respect.__class__, this=this) 5392 5393 this = self._parse_respect_or_ignore_nulls(this) 5394 5395 # bigquery select from window x AS (partition by ...) 5396 if alias: 5397 over = None 5398 self._match(TokenType.ALIAS) 5399 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5400 return this 5401 else: 5402 over = self._prev.text.upper() 5403 5404 if comments: 5405 func.comments = None # type: ignore 5406 5407 if not self._match(TokenType.L_PAREN): 5408 return self.expression( 5409 exp.Window, 5410 comments=comments, 5411 this=this, 5412 alias=self._parse_id_var(False), 5413 over=over, 5414 ) 5415 5416 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5417 5418 first = self._match(TokenType.FIRST) 5419 if self._match_text_seq("LAST"): 5420 first = False 5421 5422 partition, order = self._parse_partition_and_order() 5423 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5424 5425 if kind: 5426 self._match(TokenType.BETWEEN) 5427 start = self._parse_window_spec() 5428 self._match(TokenType.AND) 5429 end = self._parse_window_spec() 5430 5431 spec = self.expression( 5432 exp.WindowSpec, 5433 kind=kind, 5434 start=start["value"], 5435 start_side=start["side"], 5436 end=end["value"], 5437 end_side=end["side"], 5438 ) 5439 else: 5440 spec = None 5441 5442 self._match_r_paren() 5443 5444 window = self.expression( 5445 exp.Window, 5446 comments=comments, 5447 this=this, 5448 partition_by=partition, 5449 order=order, 5450 spec=spec, 5451 alias=window_alias, 5452 over=over, 5453 first=first, 5454 ) 5455 5456 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5457 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5458 return self._parse_window(window, alias=alias) 5459 5460 return window 5461 5462 def _parse_partition_and_order( 5463 self, 5464 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5465 return self._parse_partition_by(), self._parse_order() 5466 5467 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5468 self._match(TokenType.BETWEEN) 5469 5470 return { 5471 "value": ( 5472 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5473 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5474 or self._parse_bitwise() 5475 ), 5476 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5477 } 5478 5479 def _parse_alias( 5480 self, this: t.Optional[exp.Expression], explicit: bool = False 5481 ) -> t.Optional[exp.Expression]: 5482 any_token = self._match(TokenType.ALIAS) 5483 comments = self._prev_comments 5484 5485 if explicit and not any_token: 5486 return this 5487 5488 if self._match(TokenType.L_PAREN): 5489 aliases = self.expression( 5490 exp.Aliases, 5491 comments=comments, 5492 this=this, 5493 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5494 ) 5495 self._match_r_paren(aliases) 5496 return aliases 5497 5498 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5499 self.STRING_ALIASES and self._parse_string_as_identifier() 5500 ) 5501 5502 if alias: 5503 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5504 column = this.this 5505 5506 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5507 if not this.comments and column and column.comments: 5508 this.comments = column.comments 5509 column.comments = None 5510 5511 return this 5512 5513 def _parse_id_var( 5514 self, 5515 any_token: bool = True, 5516 tokens: t.Optional[t.Collection[TokenType]] = None, 5517 ) -> t.Optional[exp.Expression]: 5518 identifier = self._parse_identifier() 5519 if identifier: 5520 return identifier 5521 5522 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5523 quoted = self._prev.token_type == TokenType.STRING 5524 return exp.Identifier(this=self._prev.text, quoted=quoted) 5525 5526 return None 5527 5528 def _parse_string(self) -> t.Optional[exp.Expression]: 5529 if self._match_set(self.STRING_PARSERS): 5530 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5531 return self._parse_placeholder() 5532 5533 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5534 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5535 5536 def _parse_number(self) -> t.Optional[exp.Expression]: 5537 if self._match_set(self.NUMERIC_PARSERS): 5538 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5539 return self._parse_placeholder() 5540 5541 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5542 if self._match(TokenType.IDENTIFIER): 5543 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5544 return self._parse_placeholder() 5545 5546 def _parse_var( 5547 self, 5548 any_token: bool = False, 5549 tokens: t.Optional[t.Collection[TokenType]] = None, 5550 upper: bool = False, 5551 ) -> t.Optional[exp.Expression]: 5552 if ( 5553 (any_token and self._advance_any()) 5554 or self._match(TokenType.VAR) 5555 or (self._match_set(tokens) if tokens else False) 5556 ): 5557 return self.expression( 5558 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5559 ) 5560 return self._parse_placeholder() 5561 5562 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5563 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5564 self._advance() 5565 return self._prev 5566 return None 5567 5568 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5569 return self._parse_var() or self._parse_string() 5570 5571 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5572 return self._parse_primary() or self._parse_var(any_token=True) 5573 5574 def _parse_null(self) -> t.Optional[exp.Expression]: 5575 if self._match_set(self.NULL_TOKENS): 5576 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5577 return self._parse_placeholder() 5578 5579 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5580 if self._match(TokenType.TRUE): 5581 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5582 if self._match(TokenType.FALSE): 5583 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5584 return self._parse_placeholder() 5585 5586 def _parse_star(self) -> t.Optional[exp.Expression]: 5587 if self._match(TokenType.STAR): 5588 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5589 return self._parse_placeholder() 5590 5591 def _parse_parameter(self) -> exp.Parameter: 5592 self._match(TokenType.L_BRACE) 5593 this = self._parse_identifier() or self._parse_primary_or_var() 5594 expression = self._match(TokenType.COLON) and ( 5595 self._parse_identifier() or self._parse_primary_or_var() 5596 ) 5597 self._match(TokenType.R_BRACE) 5598 return self.expression(exp.Parameter, this=this, expression=expression) 5599 5600 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5601 if self._match_set(self.PLACEHOLDER_PARSERS): 5602 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5603 if placeholder: 5604 return placeholder 5605 self._advance(-1) 5606 return None 5607 5608 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5609 if not self._match(TokenType.EXCEPT): 5610 return None 5611 if self._match(TokenType.L_PAREN, advance=False): 5612 return self._parse_wrapped_csv(self._parse_column) 5613 5614 except_column = self._parse_column() 5615 return [except_column] if except_column else None 5616 5617 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5618 if not self._match(TokenType.REPLACE): 5619 return None 5620 if self._match(TokenType.L_PAREN, advance=False): 5621 return self._parse_wrapped_csv(self._parse_expression) 5622 5623 replace_expression = self._parse_expression() 5624 return [replace_expression] if replace_expression else None 5625 5626 def _parse_csv( 5627 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5628 ) -> t.List[exp.Expression]: 5629 parse_result = parse_method() 5630 items = [parse_result] if parse_result is not None else [] 5631 5632 while self._match(sep): 5633 self._add_comments(parse_result) 5634 parse_result = parse_method() 5635 if parse_result is not None: 5636 items.append(parse_result) 5637 5638 return items 5639 5640 def _parse_tokens( 5641 self, parse_method: t.Callable, expressions: t.Dict 5642 ) -> t.Optional[exp.Expression]: 5643 this = parse_method() 5644 5645 while self._match_set(expressions): 5646 this = self.expression( 5647 expressions[self._prev.token_type], 5648 this=this, 5649 comments=self._prev_comments, 5650 expression=parse_method(), 5651 ) 5652 5653 return this 5654 5655 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5656 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5657 5658 def _parse_wrapped_csv( 5659 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5660 ) -> t.List[exp.Expression]: 5661 return self._parse_wrapped( 5662 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5663 ) 5664 5665 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5666 wrapped = self._match(TokenType.L_PAREN) 5667 if not wrapped and not optional: 5668 self.raise_error("Expecting (") 5669 parse_result = parse_method() 5670 if wrapped: 5671 self._match_r_paren() 5672 return parse_result 5673 5674 def _parse_expressions(self) -> t.List[exp.Expression]: 5675 return self._parse_csv(self._parse_expression) 5676 5677 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5678 return self._parse_select() or self._parse_set_operations( 5679 self._parse_expression() if alias else self._parse_conjunction() 5680 ) 5681 5682 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5683 return self._parse_query_modifiers( 5684 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5685 ) 5686 5687 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5688 this = None 5689 if self._match_texts(self.TRANSACTION_KIND): 5690 this = self._prev.text 5691 5692 self._match_texts(("TRANSACTION", "WORK")) 5693 5694 modes = [] 5695 while True: 5696 mode = [] 5697 while self._match(TokenType.VAR): 5698 mode.append(self._prev.text) 5699 5700 if mode: 5701 modes.append(" ".join(mode)) 5702 if not self._match(TokenType.COMMA): 5703 break 5704 5705 return self.expression(exp.Transaction, this=this, modes=modes) 5706 5707 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5708 chain = None 5709 savepoint = None 5710 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5711 5712 self._match_texts(("TRANSACTION", "WORK")) 5713 5714 if self._match_text_seq("TO"): 5715 self._match_text_seq("SAVEPOINT") 5716 savepoint = self._parse_id_var() 5717 5718 if self._match(TokenType.AND): 5719 chain = not self._match_text_seq("NO") 5720 self._match_text_seq("CHAIN") 5721 5722 if is_rollback: 5723 return self.expression(exp.Rollback, savepoint=savepoint) 5724 5725 return self.expression(exp.Commit, chain=chain) 5726 5727 def _parse_refresh(self) -> exp.Refresh: 5728 self._match(TokenType.TABLE) 5729 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5730 5731 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5732 if not self._match_text_seq("ADD"): 5733 return None 5734 5735 self._match(TokenType.COLUMN) 5736 exists_column = self._parse_exists(not_=True) 5737 expression = self._parse_field_def() 5738 5739 if expression: 5740 expression.set("exists", exists_column) 5741 5742 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5743 if self._match_texts(("FIRST", "AFTER")): 5744 position = self._prev.text 5745 column_position = self.expression( 5746 exp.ColumnPosition, this=self._parse_column(), position=position 5747 ) 5748 expression.set("position", column_position) 5749 5750 return expression 5751 5752 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5753 drop = self._match(TokenType.DROP) and self._parse_drop() 5754 if drop and not isinstance(drop, exp.Command): 5755 drop.set("kind", drop.args.get("kind", "COLUMN")) 5756 return drop 5757 5758 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5759 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5760 return self.expression( 5761 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5762 ) 5763 5764 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5765 index = self._index - 1 5766 5767 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5768 return self._parse_csv( 5769 lambda: self.expression( 5770 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5771 ) 5772 ) 5773 5774 self._retreat(index) 5775 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5776 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5777 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5778 5779 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5780 self._match(TokenType.COLUMN) 5781 column = self._parse_field(any_token=True) 5782 5783 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5784 return self.expression(exp.AlterColumn, this=column, drop=True) 5785 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5786 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5787 if self._match(TokenType.COMMENT): 5788 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5789 5790 self._match_text_seq("SET", "DATA") 5791 self._match_text_seq("TYPE") 5792 return self.expression( 5793 exp.AlterColumn, 5794 this=column, 5795 dtype=self._parse_types(), 5796 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5797 using=self._match(TokenType.USING) and self._parse_conjunction(), 5798 ) 5799 5800 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5801 index = self._index - 1 5802 5803 partition_exists = self._parse_exists() 5804 if self._match(TokenType.PARTITION, advance=False): 5805 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5806 5807 self._retreat(index) 5808 return self._parse_csv(self._parse_drop_column) 5809 5810 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5811 if self._match(TokenType.COLUMN): 5812 exists = self._parse_exists() 5813 old_column = self._parse_column() 5814 to = self._match_text_seq("TO") 5815 new_column = self._parse_column() 5816 5817 if old_column is None or to is None or new_column is None: 5818 return None 5819 5820 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5821 5822 self._match_text_seq("TO") 5823 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5824 5825 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5826 start = self._prev 5827 5828 if not self._match(TokenType.TABLE): 5829 return self._parse_as_command(start) 5830 5831 exists = self._parse_exists() 5832 only = self._match_text_seq("ONLY") 5833 this = self._parse_table(schema=True) 5834 5835 if self._next: 5836 self._advance() 5837 5838 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5839 if parser: 5840 actions = ensure_list(parser(self)) 5841 options = self._parse_csv(self._parse_property) 5842 5843 if not self._curr and actions: 5844 return self.expression( 5845 exp.AlterTable, 5846 this=this, 5847 exists=exists, 5848 actions=actions, 5849 only=only, 5850 options=options, 5851 ) 5852 5853 return self._parse_as_command(start) 5854 5855 def _parse_merge(self) -> exp.Merge: 5856 self._match(TokenType.INTO) 5857 target = self._parse_table() 5858 5859 if target and self._match(TokenType.ALIAS, advance=False): 5860 target.set("alias", self._parse_table_alias()) 5861 5862 self._match(TokenType.USING) 5863 using = self._parse_table() 5864 5865 self._match(TokenType.ON) 5866 on = self._parse_conjunction() 5867 5868 return self.expression( 5869 exp.Merge, 5870 this=target, 5871 using=using, 5872 on=on, 5873 expressions=self._parse_when_matched(), 5874 ) 5875 5876 def _parse_when_matched(self) -> t.List[exp.When]: 5877 whens = [] 5878 5879 while self._match(TokenType.WHEN): 5880 matched = not self._match(TokenType.NOT) 5881 self._match_text_seq("MATCHED") 5882 source = ( 5883 False 5884 if self._match_text_seq("BY", "TARGET") 5885 else self._match_text_seq("BY", "SOURCE") 5886 ) 5887 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5888 5889 self._match(TokenType.THEN) 5890 5891 if self._match(TokenType.INSERT): 5892 _this = self._parse_star() 5893 if _this: 5894 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5895 else: 5896 then = self.expression( 5897 exp.Insert, 5898 this=self._parse_value(), 5899 expression=self._match_text_seq("VALUES") and self._parse_value(), 5900 ) 5901 elif self._match(TokenType.UPDATE): 5902 expressions = self._parse_star() 5903 if expressions: 5904 then = self.expression(exp.Update, expressions=expressions) 5905 else: 5906 then = self.expression( 5907 exp.Update, 5908 expressions=self._match(TokenType.SET) 5909 and self._parse_csv(self._parse_equality), 5910 ) 5911 elif self._match(TokenType.DELETE): 5912 then = self.expression(exp.Var, this=self._prev.text) 5913 else: 5914 then = None 5915 5916 whens.append( 5917 self.expression( 5918 exp.When, 5919 matched=matched, 5920 source=source, 5921 condition=condition, 5922 then=then, 5923 ) 5924 ) 5925 return whens 5926 5927 def _parse_show(self) -> t.Optional[exp.Expression]: 5928 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5929 if parser: 5930 return parser(self) 5931 return self._parse_as_command(self._prev) 5932 5933 def _parse_set_item_assignment( 5934 self, kind: t.Optional[str] = None 5935 ) -> t.Optional[exp.Expression]: 5936 index = self._index 5937 5938 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5939 return self._parse_set_transaction(global_=kind == "GLOBAL") 5940 5941 left = self._parse_primary() or self._parse_id_var() 5942 assignment_delimiter = self._match_texts(("=", "TO")) 5943 5944 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5945 self._retreat(index) 5946 return None 5947 5948 right = self._parse_statement() or self._parse_id_var() 5949 this = self.expression(exp.EQ, this=left, expression=right) 5950 5951 return self.expression(exp.SetItem, this=this, kind=kind) 5952 5953 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5954 self._match_text_seq("TRANSACTION") 5955 characteristics = self._parse_csv( 5956 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5957 ) 5958 return self.expression( 5959 exp.SetItem, 5960 expressions=characteristics, 5961 kind="TRANSACTION", 5962 **{"global": global_}, # type: ignore 5963 ) 5964 5965 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5966 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5967 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5968 5969 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5970 index = self._index 5971 set_ = self.expression( 5972 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5973 ) 5974 5975 if self._curr: 5976 self._retreat(index) 5977 return self._parse_as_command(self._prev) 5978 5979 return set_ 5980 5981 def _parse_var_from_options( 5982 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5983 ) -> t.Optional[exp.Var]: 5984 start = self._curr 5985 if not start: 5986 return None 5987 5988 option = start.text.upper() 5989 continuations = options.get(option) 5990 5991 index = self._index 5992 self._advance() 5993 for keywords in continuations or []: 5994 if isinstance(keywords, str): 5995 keywords = (keywords,) 5996 5997 if self._match_text_seq(*keywords): 5998 option = f"{option} {' '.join(keywords)}" 5999 break 6000 else: 6001 if continuations or continuations is None: 6002 if raise_unmatched: 6003 self.raise_error(f"Unknown option {option}") 6004 6005 self._retreat(index) 6006 return None 6007 6008 return exp.var(option) 6009 6010 def _parse_as_command(self, start: Token) -> exp.Command: 6011 while self._curr: 6012 self._advance() 6013 text = self._find_sql(start, self._prev) 6014 size = len(start.text) 6015 self._warn_unsupported() 6016 return exp.Command(this=text[:size], expression=text[size:]) 6017 6018 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6019 settings = [] 6020 6021 self._match_l_paren() 6022 kind = self._parse_id_var() 6023 6024 if self._match(TokenType.L_PAREN): 6025 while True: 6026 key = self._parse_id_var() 6027 value = self._parse_primary() 6028 6029 if not key and value is None: 6030 break 6031 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6032 self._match(TokenType.R_PAREN) 6033 6034 self._match_r_paren() 6035 6036 return self.expression( 6037 exp.DictProperty, 6038 this=this, 6039 kind=kind.this if kind else None, 6040 settings=settings, 6041 ) 6042 6043 def _parse_dict_range(self, this: str) -> exp.DictRange: 6044 self._match_l_paren() 6045 has_min = self._match_text_seq("MIN") 6046 if has_min: 6047 min = self._parse_var() or self._parse_primary() 6048 self._match_text_seq("MAX") 6049 max = self._parse_var() or self._parse_primary() 6050 else: 6051 max = self._parse_var() or self._parse_primary() 6052 min = exp.Literal.number(0) 6053 self._match_r_paren() 6054 return self.expression(exp.DictRange, this=this, min=min, max=max) 6055 6056 def _parse_comprehension( 6057 self, this: t.Optional[exp.Expression] 6058 ) -> t.Optional[exp.Comprehension]: 6059 index = self._index 6060 expression = self._parse_column() 6061 if not self._match(TokenType.IN): 6062 self._retreat(index - 1) 6063 return None 6064 iterator = self._parse_column() 6065 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6066 return self.expression( 6067 exp.Comprehension, 6068 this=this, 6069 expression=expression, 6070 iterator=iterator, 6071 condition=condition, 6072 ) 6073 6074 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6075 if self._match(TokenType.HEREDOC_STRING): 6076 return self.expression(exp.Heredoc, this=self._prev.text) 6077 6078 if not self._match_text_seq("$"): 6079 return None 6080 6081 tags = ["$"] 6082 tag_text = None 6083 6084 if self._is_connected(): 6085 self._advance() 6086 tags.append(self._prev.text.upper()) 6087 else: 6088 self.raise_error("No closing $ found") 6089 6090 if tags[-1] != "$": 6091 if self._is_connected() and self._match_text_seq("$"): 6092 tag_text = tags[-1] 6093 tags.append("$") 6094 else: 6095 self.raise_error("No closing $ found") 6096 6097 heredoc_start = self._curr 6098 6099 while self._curr: 6100 if self._match_text_seq(*tags, advance=False): 6101 this = self._find_sql(heredoc_start, self._prev) 6102 self._advance(len(tags)) 6103 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6104 6105 self._advance() 6106 6107 self.raise_error(f"No closing {''.join(tags)} found") 6108 return None 6109 6110 def _find_parser( 6111 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6112 ) -> t.Optional[t.Callable]: 6113 if not self._curr: 6114 return None 6115 6116 index = self._index 6117 this = [] 6118 while True: 6119 # The current token might be multiple words 6120 curr = self._curr.text.upper() 6121 key = curr.split(" ") 6122 this.append(curr) 6123 6124 self._advance() 6125 result, trie = in_trie(trie, key) 6126 if result == TrieResult.FAILED: 6127 break 6128 6129 if result == TrieResult.EXISTS: 6130 subparser = parsers[" ".join(this)] 6131 return subparser 6132 6133 self._retreat(index) 6134 return None 6135 6136 def _match(self, token_type, advance=True, expression=None): 6137 if not self._curr: 6138 return None 6139 6140 if self._curr.token_type == token_type: 6141 if advance: 6142 self._advance() 6143 self._add_comments(expression) 6144 return True 6145 6146 return None 6147 6148 def _match_set(self, types, advance=True): 6149 if not self._curr: 6150 return None 6151 6152 if self._curr.token_type in types: 6153 if advance: 6154 self._advance() 6155 return True 6156 6157 return None 6158 6159 def _match_pair(self, token_type_a, token_type_b, advance=True): 6160 if not self._curr or not self._next: 6161 return None 6162 6163 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6164 if advance: 6165 self._advance(2) 6166 return True 6167 6168 return None 6169 6170 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6171 if not self._match(TokenType.L_PAREN, expression=expression): 6172 self.raise_error("Expecting (") 6173 6174 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6175 if not self._match(TokenType.R_PAREN, expression=expression): 6176 self.raise_error("Expecting )") 6177 6178 def _match_texts(self, texts, advance=True): 6179 if self._curr and self._curr.text.upper() in texts: 6180 if advance: 6181 self._advance() 6182 return True 6183 return None 6184 6185 def _match_text_seq(self, *texts, advance=True): 6186 index = self._index 6187 for text in texts: 6188 if self._curr and self._curr.text.upper() == text: 6189 self._advance() 6190 else: 6191 self._retreat(index) 6192 return None 6193 6194 if not advance: 6195 self._retreat(index) 6196 6197 return True 6198 6199 def _replace_lambda( 6200 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6201 ) -> t.Optional[exp.Expression]: 6202 if not node: 6203 return node 6204 6205 for column in node.find_all(exp.Column): 6206 if column.parts[0].name in lambda_variables: 6207 dot_or_id = column.to_dot() if column.table else column.this 6208 parent = column.parent 6209 6210 while isinstance(parent, exp.Dot): 6211 if not isinstance(parent.parent, exp.Dot): 6212 parent.replace(dot_or_id) 6213 break 6214 parent = parent.parent 6215 else: 6216 if column is node: 6217 node = dot_or_id 6218 else: 6219 column.replace(dot_or_id) 6220 return node 6221 6222 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6223 start = self._prev 6224 6225 # Not to be confused with TRUNCATE(number, decimals) function call 6226 if self._match(TokenType.L_PAREN): 6227 self._retreat(self._index - 2) 6228 return self._parse_function() 6229 6230 # Clickhouse supports TRUNCATE DATABASE as well 6231 is_database = self._match(TokenType.DATABASE) 6232 6233 self._match(TokenType.TABLE) 6234 6235 exists = self._parse_exists(not_=False) 6236 6237 expressions = self._parse_csv( 6238 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6239 ) 6240 6241 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6242 6243 if self._match_text_seq("RESTART", "IDENTITY"): 6244 identity = "RESTART" 6245 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6246 identity = "CONTINUE" 6247 else: 6248 identity = None 6249 6250 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6251 option = self._prev.text 6252 else: 6253 option = None 6254 6255 partition = self._parse_partition() 6256 6257 # Fallback case 6258 if self._curr: 6259 return self._parse_as_command(start) 6260 6261 return self.expression( 6262 exp.TruncateTable, 6263 expressions=expressions, 6264 is_database=is_database, 6265 exists=exists, 6266 cluster=cluster, 6267 identity=identity, 6268 option=option, 6269 partition=partition, 6270 ) 6271 6272 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6273 this = self._parse_ordered(self._parse_opclass) 6274 6275 if not self._match(TokenType.WITH): 6276 return this 6277 6278 op = self._parse_var(any_token=True) 6279 6280 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.CREATE: lambda self: self._parse_create(), 632 TokenType.DELETE: lambda self: self._parse_delete(), 633 TokenType.DESC: lambda self: self._parse_describe(), 634 TokenType.DESCRIBE: lambda self: self._parse_describe(), 635 TokenType.DROP: lambda self: self._parse_drop(), 636 TokenType.INSERT: lambda self: self._parse_insert(), 637 TokenType.KILL: lambda self: self._parse_kill(), 638 TokenType.LOAD: lambda self: self._parse_load(), 639 TokenType.MERGE: lambda self: self._parse_merge(), 640 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 641 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 642 TokenType.REFRESH: lambda self: self._parse_refresh(), 643 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 644 TokenType.SET: lambda self: self._parse_set(), 645 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 646 TokenType.UNCACHE: lambda self: self._parse_uncache(), 647 TokenType.UPDATE: lambda self: self._parse_update(), 648 TokenType.USE: lambda self: self.expression( 649 exp.Use, 650 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 651 this=self._parse_table(schema=False), 652 ), 653 } 654 655 UNARY_PARSERS = { 656 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 657 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 658 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 659 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 660 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 661 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 662 } 663 664 STRING_PARSERS = { 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 669 exp.National, this=token.text 670 ), 671 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 672 TokenType.STRING: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=True 674 ), 675 TokenType.UNICODE_STRING: lambda self, token: self.expression( 676 exp.UnicodeString, 677 this=token.text, 678 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 679 ), 680 } 681 682 NUMERIC_PARSERS = { 683 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 684 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 685 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 686 TokenType.NUMBER: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=False 688 ), 689 } 690 691 PRIMARY_PARSERS = { 692 **STRING_PARSERS, 693 **NUMERIC_PARSERS, 694 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 695 TokenType.NULL: lambda self, _: self.expression(exp.Null), 696 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 697 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 698 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 699 TokenType.STAR: lambda self, _: self.expression( 700 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 701 ), 702 } 703 704 PLACEHOLDER_PARSERS = { 705 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 706 TokenType.PARAMETER: lambda self: self._parse_parameter(), 707 TokenType.COLON: lambda self: ( 708 self.expression(exp.Placeholder, this=self._prev.text) 709 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 710 else None 711 ), 712 } 713 714 RANGE_PARSERS = { 715 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 716 TokenType.GLOB: binary_range_parser(exp.Glob), 717 TokenType.ILIKE: binary_range_parser(exp.ILike), 718 TokenType.IN: lambda self, this: self._parse_in(this), 719 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 720 TokenType.IS: lambda self, this: self._parse_is(this), 721 TokenType.LIKE: binary_range_parser(exp.Like), 722 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 723 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 724 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 725 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 726 } 727 728 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 729 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 730 "AUTO": lambda self: self._parse_auto_property(), 731 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 732 "BACKUP": lambda self: self.expression( 733 exp.BackupProperty, this=self._parse_var(any_token=True) 734 ), 735 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 736 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHECKSUM": lambda self: self._parse_checksum(), 739 "CLUSTER BY": lambda self: self._parse_cluster(), 740 "CLUSTERED": lambda self: self._parse_clustered_by(), 741 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 742 exp.CollateProperty, **kwargs 743 ), 744 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 745 "CONTAINS": lambda self: self._parse_contains_property(), 746 "COPY": lambda self: self._parse_copy_property(), 747 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 748 "DEFINER": lambda self: self._parse_definer(), 749 "DETERMINISTIC": lambda self: self.expression( 750 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 751 ), 752 "DISTKEY": lambda self: self._parse_distkey(), 753 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 754 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 755 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 756 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 757 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 758 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 759 "FREESPACE": lambda self: self._parse_freespace(), 760 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 761 "HEAP": lambda self: self.expression(exp.HeapProperty), 762 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 763 "IMMUTABLE": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "INHERITS": lambda self: self.expression( 767 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 768 ), 769 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 770 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 771 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 772 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 773 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 774 "LIKE": lambda self: self._parse_create_like(), 775 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 776 "LOCK": lambda self: self._parse_locking(), 777 "LOCKING": lambda self: self._parse_locking(), 778 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 779 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 780 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 781 "MODIFIES": lambda self: self._parse_modifies_property(), 782 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 783 "NO": lambda self: self._parse_no_property(), 784 "ON": lambda self: self._parse_on_property(), 785 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 786 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 787 "PARTITION": lambda self: self._parse_partitioned_of(), 788 "PARTITION BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 791 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 792 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 793 "READS": lambda self: self._parse_reads_property(), 794 "REMOTE": lambda self: self._parse_remote_with_connection(), 795 "RETURNS": lambda self: self._parse_returns(), 796 "ROW": lambda self: self._parse_row(), 797 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 798 "SAMPLE": lambda self: self.expression( 799 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 800 ), 801 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 802 "SETTINGS": lambda self: self.expression( 803 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 804 ), 805 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 806 "SORTKEY": lambda self: self._parse_sortkey(), 807 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 808 "STABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("STABLE") 810 ), 811 "STORED": lambda self: self._parse_stored(), 812 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 813 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 814 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 815 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 816 "TO": lambda self: self._parse_to_table(), 817 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 818 "TRANSFORM": lambda self: self.expression( 819 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 820 ), 821 "TTL": lambda self: self._parse_ttl(), 822 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 823 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 824 "VOLATILE": lambda self: self._parse_volatile_property(), 825 "WITH": lambda self: self._parse_with_property(), 826 } 827 828 CONSTRAINT_PARSERS = { 829 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 830 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 831 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 832 "CHARACTER SET": lambda self: self.expression( 833 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 834 ), 835 "CHECK": lambda self: self.expression( 836 exp.CheckColumnConstraint, 837 this=self._parse_wrapped(self._parse_conjunction), 838 enforced=self._match_text_seq("ENFORCED"), 839 ), 840 "COLLATE": lambda self: self.expression( 841 exp.CollateColumnConstraint, this=self._parse_var() 842 ), 843 "COMMENT": lambda self: self.expression( 844 exp.CommentColumnConstraint, this=self._parse_string() 845 ), 846 "COMPRESS": lambda self: self._parse_compress(), 847 "CLUSTERED": lambda self: self.expression( 848 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "NONCLUSTERED": lambda self: self.expression( 851 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "DEFAULT": lambda self: self.expression( 854 exp.DefaultColumnConstraint, this=self._parse_bitwise() 855 ), 856 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 857 "EPHEMERAL": lambda self: self.expression( 858 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 859 ), 860 "EXCLUDE": lambda self: self.expression( 861 exp.ExcludeColumnConstraint, this=self._parse_index_params() 862 ), 863 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 864 "FORMAT": lambda self: self.expression( 865 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 866 ), 867 "GENERATED": lambda self: self._parse_generated_as_identity(), 868 "IDENTITY": lambda self: self._parse_auto_increment(), 869 "INLINE": lambda self: self._parse_inline(), 870 "LIKE": lambda self: self._parse_create_like(), 871 "NOT": lambda self: self._parse_not_constraint(), 872 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 873 "ON": lambda self: ( 874 self._match(TokenType.UPDATE) 875 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 876 ) 877 or self.expression(exp.OnProperty, this=self._parse_id_var()), 878 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 879 "PERIOD": lambda self: self._parse_period_for_system_time(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(), 881 "REFERENCES": lambda self: self._parse_references(match=False), 882 "TITLE": lambda self: self.expression( 883 exp.TitleColumnConstraint, this=self._parse_var_or_string() 884 ), 885 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 886 "UNIQUE": lambda self: self._parse_unique(), 887 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 888 "WITH": lambda self: self.expression( 889 exp.Properties, expressions=self._parse_wrapped_properties() 890 ), 891 } 892 893 ALTER_PARSERS = { 894 "ADD": lambda self: self._parse_alter_table_add(), 895 "ALTER": lambda self: self._parse_alter_table_alter(), 896 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 897 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 898 "DROP": lambda self: self._parse_alter_table_drop(), 899 "RENAME": lambda self: self._parse_alter_table_rename(), 900 } 901 902 SCHEMA_UNNAMED_CONSTRAINTS = { 903 "CHECK", 904 "EXCLUDE", 905 "FOREIGN KEY", 906 "LIKE", 907 "PERIOD", 908 "PRIMARY KEY", 909 "UNIQUE", 910 } 911 912 NO_PAREN_FUNCTION_PARSERS = { 913 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 914 "CASE": lambda self: self._parse_case(), 915 "IF": lambda self: self._parse_if(), 916 "NEXT": lambda self: self._parse_next_value_for(), 917 } 918 919 INVALID_FUNC_NAME_TOKENS = { 920 TokenType.IDENTIFIER, 921 TokenType.STRING, 922 } 923 924 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 925 926 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 927 928 FUNCTION_PARSERS = { 929 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 930 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 931 "DECODE": lambda self: self._parse_decode(), 932 "EXTRACT": lambda self: self._parse_extract(), 933 "JSON_OBJECT": lambda self: self._parse_json_object(), 934 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 935 "JSON_TABLE": lambda self: self._parse_json_table(), 936 "MATCH": lambda self: self._parse_match_against(), 937 "OPENJSON": lambda self: self._parse_open_json(), 938 "POSITION": lambda self: self._parse_position(), 939 "PREDICT": lambda self: self._parse_predict(), 940 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 941 "STRING_AGG": lambda self: self._parse_string_agg(), 942 "SUBSTRING": lambda self: self._parse_substring(), 943 "TRIM": lambda self: self._parse_trim(), 944 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 945 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 946 } 947 948 QUERY_MODIFIER_PARSERS = { 949 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 950 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 951 TokenType.WHERE: lambda self: ("where", self._parse_where()), 952 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 953 TokenType.HAVING: lambda self: ("having", self._parse_having()), 954 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 955 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 956 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 957 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 958 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 959 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 960 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 961 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 962 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.CLUSTER_BY: lambda self: ( 965 "cluster", 966 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 967 ), 968 TokenType.DISTRIBUTE_BY: lambda self: ( 969 "distribute", 970 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 971 ), 972 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 973 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 974 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 975 } 976 977 SET_PARSERS = { 978 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 979 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 980 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 981 "TRANSACTION": lambda self: self._parse_set_transaction(), 982 } 983 984 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 985 986 TYPE_LITERAL_PARSERS = { 987 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 988 } 989 990 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 991 992 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 993 994 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 995 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 996 "ISOLATION": ( 997 ("LEVEL", "REPEATABLE", "READ"), 998 ("LEVEL", "READ", "COMMITTED"), 999 ("LEVEL", "READ", "UNCOMITTED"), 1000 ("LEVEL", "SERIALIZABLE"), 1001 ), 1002 "READ": ("WRITE", "ONLY"), 1003 } 1004 1005 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1006 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1007 ) 1008 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1009 1010 CREATE_SEQUENCE: OPTIONS_TYPE = { 1011 "SCALE": ("EXTEND", "NOEXTEND"), 1012 "SHARD": ("EXTEND", "NOEXTEND"), 1013 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1014 **dict.fromkeys( 1015 ( 1016 "SESSION", 1017 "GLOBAL", 1018 "KEEP", 1019 "NOKEEP", 1020 "ORDER", 1021 "NOORDER", 1022 "NOCACHE", 1023 "CYCLE", 1024 "NOCYCLE", 1025 "NOMINVALUE", 1026 "NOMAXVALUE", 1027 "NOSCALE", 1028 "NOSHARD", 1029 ), 1030 tuple(), 1031 ), 1032 } 1033 1034 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1035 1036 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1037 1038 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1039 1040 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1041 1042 CLONE_KEYWORDS = {"CLONE", "COPY"} 1043 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1044 1045 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1046 1047 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1048 1049 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1050 1051 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1052 1053 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1054 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1055 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1056 1057 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1058 1059 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1060 1061 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1062 1063 DISTINCT_TOKENS = {TokenType.DISTINCT} 1064 1065 NULL_TOKENS = {TokenType.NULL} 1066 1067 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1068 1069 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1070 1071 STRICT_CAST = True 1072 1073 PREFIXED_PIVOT_COLUMNS = False 1074 IDENTIFY_PIVOT_STRINGS = False 1075 1076 LOG_DEFAULTS_TO_LN = False 1077 1078 # Whether ADD is present for each column added by ALTER TABLE 1079 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1080 1081 # Whether the table sample clause expects CSV syntax 1082 TABLESAMPLE_CSV = False 1083 1084 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1085 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1086 1087 # Whether the TRIM function expects the characters to trim as its first argument 1088 TRIM_PATTERN_FIRST = False 1089 1090 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1091 STRING_ALIASES = False 1092 1093 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1094 MODIFIERS_ATTACHED_TO_UNION = True 1095 UNION_MODIFIERS = {"order", "limit", "offset"} 1096 1097 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1098 NO_PAREN_IF_COMMANDS = True 1099 1100 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1101 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1102 1103 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1104 # If this is True and '(' is not found, the keyword will be treated as an identifier 1105 VALUES_FOLLOWED_BY_PAREN = True 1106 1107 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1108 SUPPORTS_IMPLICIT_UNNEST = False 1109 1110 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1111 INTERVAL_SPANS = True 1112 1113 # Whether a PARTITION clause can follow a table reference 1114 SUPPORTS_PARTITION_SELECTION = False 1115 1116 __slots__ = ( 1117 "error_level", 1118 "error_message_context", 1119 "max_errors", 1120 "dialect", 1121 "sql", 1122 "errors", 1123 "_tokens", 1124 "_index", 1125 "_curr", 1126 "_next", 1127 "_prev", 1128 "_prev_comments", 1129 ) 1130 1131 # Autofilled 1132 SHOW_TRIE: t.Dict = {} 1133 SET_TRIE: t.Dict = {} 1134 1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset() 1149 1150 def reset(self): 1151 self.sql = "" 1152 self.errors = [] 1153 self._tokens = [] 1154 self._index = 0 1155 self._curr = None 1156 self._next = None 1157 self._prev = None 1158 self._prev_comments = None 1159 1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 ) 1177 1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1] 1213 1214 def _parse( 1215 self, 1216 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1217 raw_tokens: t.List[Token], 1218 sql: t.Optional[str] = None, 1219 ) -> t.List[t.Optional[exp.Expression]]: 1220 self.reset() 1221 self.sql = sql or "" 1222 1223 total = len(raw_tokens) 1224 chunks: t.List[t.List[Token]] = [[]] 1225 1226 for i, token in enumerate(raw_tokens): 1227 if token.token_type == TokenType.SEMICOLON: 1228 if i < total - 1: 1229 chunks.append([]) 1230 else: 1231 chunks[-1].append(token) 1232 1233 expressions = [] 1234 1235 for tokens in chunks: 1236 self._index = -1 1237 self._tokens = tokens 1238 self._advance() 1239 1240 expressions.append(parse_method(self)) 1241 1242 if self._index < len(self._tokens): 1243 self.raise_error("Invalid expression / Unexpected token") 1244 1245 self.check_errors() 1246 1247 return expressions 1248 1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 ) 1259 1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error) 1287 1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance) 1305 1306 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1307 if expression and self._prev_comments: 1308 expression.add_comments(self._prev_comments) 1309 self._prev_comments = None 1310 1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression 1327 1328 def _find_sql(self, start: Token, end: Token) -> str: 1329 return self.sql[start.start : end.end + 1] 1330 1331 def _is_connected(self) -> bool: 1332 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1333 1334 def _advance(self, times: int = 1) -> None: 1335 self._index += times 1336 self._curr = seq_get(self._tokens, self._index) 1337 self._next = seq_get(self._tokens, self._index + 1) 1338 1339 if self._index > 0: 1340 self._prev = self._tokens[self._index - 1] 1341 self._prev_comments = self._prev.comments 1342 else: 1343 self._prev = None 1344 self._prev_comments = None 1345 1346 def _retreat(self, index: int) -> None: 1347 if index != self._index: 1348 self._advance(index - self._index) 1349 1350 def _warn_unsupported(self) -> None: 1351 if len(self._tokens) <= 1: 1352 return 1353 1354 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1355 # interested in emitting a warning for the one being currently processed. 1356 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1357 1358 logger.warning( 1359 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1360 ) 1361 1362 def _parse_command(self) -> exp.Command: 1363 self._warn_unsupported() 1364 return self.expression( 1365 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1366 ) 1367 1368 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1369 """ 1370 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1371 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1372 the parser state accordingly 1373 """ 1374 index = self._index 1375 error_level = self.error_level 1376 1377 self.error_level = ErrorLevel.IMMEDIATE 1378 try: 1379 this = parse_method() 1380 except ParseError: 1381 this = None 1382 finally: 1383 if not this or retreat: 1384 self._retreat(index) 1385 self.error_level = error_level 1386 1387 return this 1388 1389 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1390 start = self._prev 1391 exists = self._parse_exists() if allow_exists else None 1392 1393 self._match(TokenType.ON) 1394 1395 materialized = self._match_text_seq("MATERIALIZED") 1396 kind = self._match_set(self.CREATABLES) and self._prev 1397 if not kind: 1398 return self._parse_as_command(start) 1399 1400 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1401 this = self._parse_user_defined_function(kind=kind.token_type) 1402 elif kind.token_type == TokenType.TABLE: 1403 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1404 elif kind.token_type == TokenType.COLUMN: 1405 this = self._parse_column() 1406 else: 1407 this = self._parse_id_var() 1408 1409 self._match(TokenType.IS) 1410 1411 return self.expression( 1412 exp.Comment, 1413 this=this, 1414 kind=kind.text, 1415 expression=self._parse_string(), 1416 exists=exists, 1417 materialized=materialized, 1418 ) 1419 1420 def _parse_to_table( 1421 self, 1422 ) -> exp.ToTableProperty: 1423 table = self._parse_table_parts(schema=True) 1424 return self.expression(exp.ToTableProperty, this=table) 1425 1426 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1427 def _parse_ttl(self) -> exp.Expression: 1428 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1429 this = self._parse_bitwise() 1430 1431 if self._match_text_seq("DELETE"): 1432 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1433 if self._match_text_seq("RECOMPRESS"): 1434 return self.expression( 1435 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1436 ) 1437 if self._match_text_seq("TO", "DISK"): 1438 return self.expression( 1439 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1440 ) 1441 if self._match_text_seq("TO", "VOLUME"): 1442 return self.expression( 1443 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1444 ) 1445 1446 return this 1447 1448 expressions = self._parse_csv(_parse_ttl_action) 1449 where = self._parse_where() 1450 group = self._parse_group() 1451 1452 aggregates = None 1453 if group and self._match(TokenType.SET): 1454 aggregates = self._parse_csv(self._parse_set_item) 1455 1456 return self.expression( 1457 exp.MergeTreeTTL, 1458 expressions=expressions, 1459 where=where, 1460 group=group, 1461 aggregates=aggregates, 1462 ) 1463 1464 def _parse_statement(self) -> t.Optional[exp.Expression]: 1465 if self._curr is None: 1466 return None 1467 1468 if self._match_set(self.STATEMENT_PARSERS): 1469 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1470 1471 if self._match_set(Tokenizer.COMMANDS): 1472 return self._parse_command() 1473 1474 expression = self._parse_expression() 1475 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1476 return self._parse_query_modifiers(expression) 1477 1478 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1479 start = self._prev 1480 temporary = self._match(TokenType.TEMPORARY) 1481 materialized = self._match_text_seq("MATERIALIZED") 1482 1483 kind = self._match_set(self.CREATABLES) and self._prev.text 1484 if not kind: 1485 return self._parse_as_command(start) 1486 1487 if_exists = exists or self._parse_exists() 1488 table = self._parse_table_parts( 1489 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1490 ) 1491 1492 if self._match(TokenType.L_PAREN, advance=False): 1493 expressions = self._parse_wrapped_csv(self._parse_types) 1494 else: 1495 expressions = None 1496 1497 return self.expression( 1498 exp.Drop, 1499 comments=start.comments, 1500 exists=if_exists, 1501 this=table, 1502 expressions=expressions, 1503 kind=kind, 1504 temporary=temporary, 1505 materialized=materialized, 1506 cascade=self._match_text_seq("CASCADE"), 1507 constraints=self._match_text_seq("CONSTRAINTS"), 1508 purge=self._match_text_seq("PURGE"), 1509 ) 1510 1511 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1512 return ( 1513 self._match_text_seq("IF") 1514 and (not not_ or self._match(TokenType.NOT)) 1515 and self._match(TokenType.EXISTS) 1516 ) 1517 1518 def _parse_create(self) -> exp.Create | exp.Command: 1519 # Note: this can't be None because we've matched a statement parser 1520 start = self._prev 1521 comments = self._prev_comments 1522 1523 replace = ( 1524 start.token_type == TokenType.REPLACE 1525 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1526 or self._match_pair(TokenType.OR, TokenType.ALTER) 1527 ) 1528 1529 unique = self._match(TokenType.UNIQUE) 1530 1531 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1532 self._advance() 1533 1534 properties = None 1535 create_token = self._match_set(self.CREATABLES) and self._prev 1536 1537 if not create_token: 1538 # exp.Properties.Location.POST_CREATE 1539 properties = self._parse_properties() 1540 create_token = self._match_set(self.CREATABLES) and self._prev 1541 1542 if not properties or not create_token: 1543 return self._parse_as_command(start) 1544 1545 exists = self._parse_exists(not_=True) 1546 this = None 1547 expression: t.Optional[exp.Expression] = None 1548 indexes = None 1549 no_schema_binding = None 1550 begin = None 1551 end = None 1552 clone = None 1553 1554 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1555 nonlocal properties 1556 if properties and temp_props: 1557 properties.expressions.extend(temp_props.expressions) 1558 elif temp_props: 1559 properties = temp_props 1560 1561 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1562 this = self._parse_user_defined_function(kind=create_token.token_type) 1563 1564 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1565 extend_props(self._parse_properties()) 1566 1567 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1568 1569 if not expression: 1570 if self._match(TokenType.COMMAND): 1571 expression = self._parse_as_command(self._prev) 1572 else: 1573 begin = self._match(TokenType.BEGIN) 1574 return_ = self._match_text_seq("RETURN") 1575 1576 if self._match(TokenType.STRING, advance=False): 1577 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1578 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1579 expression = self._parse_string() 1580 extend_props(self._parse_properties()) 1581 else: 1582 expression = self._parse_statement() 1583 1584 end = self._match_text_seq("END") 1585 1586 if return_: 1587 expression = self.expression(exp.Return, this=expression) 1588 elif create_token.token_type == TokenType.INDEX: 1589 this = self._parse_index(index=self._parse_id_var()) 1590 elif create_token.token_type in self.DB_CREATABLES: 1591 table_parts = self._parse_table_parts( 1592 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1593 ) 1594 1595 # exp.Properties.Location.POST_NAME 1596 self._match(TokenType.COMMA) 1597 extend_props(self._parse_properties(before=True)) 1598 1599 this = self._parse_schema(this=table_parts) 1600 1601 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1602 extend_props(self._parse_properties()) 1603 1604 self._match(TokenType.ALIAS) 1605 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1606 # exp.Properties.Location.POST_ALIAS 1607 extend_props(self._parse_properties()) 1608 1609 if create_token.token_type == TokenType.SEQUENCE: 1610 expression = self._parse_types() 1611 extend_props(self._parse_properties()) 1612 else: 1613 expression = self._parse_ddl_select() 1614 1615 if create_token.token_type == TokenType.TABLE: 1616 # exp.Properties.Location.POST_EXPRESSION 1617 extend_props(self._parse_properties()) 1618 1619 indexes = [] 1620 while True: 1621 index = self._parse_index() 1622 1623 # exp.Properties.Location.POST_INDEX 1624 extend_props(self._parse_properties()) 1625 1626 if not index: 1627 break 1628 else: 1629 self._match(TokenType.COMMA) 1630 indexes.append(index) 1631 elif create_token.token_type == TokenType.VIEW: 1632 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1633 no_schema_binding = True 1634 1635 shallow = self._match_text_seq("SHALLOW") 1636 1637 if self._match_texts(self.CLONE_KEYWORDS): 1638 copy = self._prev.text.lower() == "copy" 1639 clone = self.expression( 1640 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1641 ) 1642 1643 if self._curr: 1644 return self._parse_as_command(start) 1645 1646 return self.expression( 1647 exp.Create, 1648 comments=comments, 1649 this=this, 1650 kind=create_token.text.upper(), 1651 replace=replace, 1652 unique=unique, 1653 expression=expression, 1654 exists=exists, 1655 properties=properties, 1656 indexes=indexes, 1657 no_schema_binding=no_schema_binding, 1658 begin=begin, 1659 end=end, 1660 clone=clone, 1661 ) 1662 1663 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1664 seq = exp.SequenceProperties() 1665 1666 options = [] 1667 index = self._index 1668 1669 while self._curr: 1670 if self._match_text_seq("INCREMENT"): 1671 self._match_text_seq("BY") 1672 self._match_text_seq("=") 1673 seq.set("increment", self._parse_term()) 1674 elif self._match_text_seq("MINVALUE"): 1675 seq.set("minvalue", self._parse_term()) 1676 elif self._match_text_seq("MAXVALUE"): 1677 seq.set("maxvalue", self._parse_term()) 1678 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1679 self._match_text_seq("=") 1680 seq.set("start", self._parse_term()) 1681 elif self._match_text_seq("CACHE"): 1682 # T-SQL allows empty CACHE which is initialized dynamically 1683 seq.set("cache", self._parse_number() or True) 1684 elif self._match_text_seq("OWNED", "BY"): 1685 # "OWNED BY NONE" is the default 1686 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1687 else: 1688 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1689 if opt: 1690 options.append(opt) 1691 else: 1692 break 1693 1694 seq.set("options", options if options else None) 1695 return None if self._index == index else seq 1696 1697 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1698 # only used for teradata currently 1699 self._match(TokenType.COMMA) 1700 1701 kwargs = { 1702 "no": self._match_text_seq("NO"), 1703 "dual": self._match_text_seq("DUAL"), 1704 "before": self._match_text_seq("BEFORE"), 1705 "default": self._match_text_seq("DEFAULT"), 1706 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1707 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1708 "after": self._match_text_seq("AFTER"), 1709 "minimum": self._match_texts(("MIN", "MINIMUM")), 1710 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1711 } 1712 1713 if self._match_texts(self.PROPERTY_PARSERS): 1714 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1715 try: 1716 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1717 except TypeError: 1718 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1719 1720 return None 1721 1722 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1723 return self._parse_wrapped_csv(self._parse_property) 1724 1725 def _parse_property(self) -> t.Optional[exp.Expression]: 1726 if self._match_texts(self.PROPERTY_PARSERS): 1727 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1728 1729 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1730 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1731 1732 if self._match_text_seq("COMPOUND", "SORTKEY"): 1733 return self._parse_sortkey(compound=True) 1734 1735 if self._match_text_seq("SQL", "SECURITY"): 1736 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1737 1738 index = self._index 1739 key = self._parse_column() 1740 1741 if not self._match(TokenType.EQ): 1742 self._retreat(index) 1743 return self._parse_sequence_properties() 1744 1745 return self.expression( 1746 exp.Property, 1747 this=key.to_dot() if isinstance(key, exp.Column) else key, 1748 value=self._parse_bitwise() or self._parse_var(any_token=True), 1749 ) 1750 1751 def _parse_stored(self) -> exp.FileFormatProperty: 1752 self._match(TokenType.ALIAS) 1753 1754 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1755 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1756 1757 return self.expression( 1758 exp.FileFormatProperty, 1759 this=( 1760 self.expression( 1761 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1762 ) 1763 if input_format or output_format 1764 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1765 ), 1766 ) 1767 1768 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1769 self._match(TokenType.EQ) 1770 self._match(TokenType.ALIAS) 1771 field = self._parse_field() 1772 if isinstance(field, exp.Identifier) and not field.quoted: 1773 field = exp.var(field) 1774 1775 return self.expression(exp_class, this=field, **kwargs) 1776 1777 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1778 properties = [] 1779 while True: 1780 if before: 1781 prop = self._parse_property_before() 1782 else: 1783 prop = self._parse_property() 1784 if not prop: 1785 break 1786 for p in ensure_list(prop): 1787 properties.append(p) 1788 1789 if properties: 1790 return self.expression(exp.Properties, expressions=properties) 1791 1792 return None 1793 1794 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1795 return self.expression( 1796 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1797 ) 1798 1799 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1800 if self._index >= 2: 1801 pre_volatile_token = self._tokens[self._index - 2] 1802 else: 1803 pre_volatile_token = None 1804 1805 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1806 return exp.VolatileProperty() 1807 1808 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1809 1810 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1811 self._match_pair(TokenType.EQ, TokenType.ON) 1812 1813 prop = self.expression(exp.WithSystemVersioningProperty) 1814 if self._match(TokenType.L_PAREN): 1815 self._match_text_seq("HISTORY_TABLE", "=") 1816 prop.set("this", self._parse_table_parts()) 1817 1818 if self._match(TokenType.COMMA): 1819 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1820 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1821 1822 self._match_r_paren() 1823 1824 return prop 1825 1826 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1827 if self._match(TokenType.L_PAREN, advance=False): 1828 return self._parse_wrapped_properties() 1829 1830 if self._match_text_seq("JOURNAL"): 1831 return self._parse_withjournaltable() 1832 1833 if self._match_texts(self.VIEW_ATTRIBUTES): 1834 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1835 1836 if self._match_text_seq("DATA"): 1837 return self._parse_withdata(no=False) 1838 elif self._match_text_seq("NO", "DATA"): 1839 return self._parse_withdata(no=True) 1840 1841 if not self._next: 1842 return None 1843 1844 return self._parse_withisolatedloading() 1845 1846 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1847 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1848 self._match(TokenType.EQ) 1849 1850 user = self._parse_id_var() 1851 self._match(TokenType.PARAMETER) 1852 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1853 1854 if not user or not host: 1855 return None 1856 1857 return exp.DefinerProperty(this=f"{user}@{host}") 1858 1859 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1860 self._match(TokenType.TABLE) 1861 self._match(TokenType.EQ) 1862 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1863 1864 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1865 return self.expression(exp.LogProperty, no=no) 1866 1867 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1868 return self.expression(exp.JournalProperty, **kwargs) 1869 1870 def _parse_checksum(self) -> exp.ChecksumProperty: 1871 self._match(TokenType.EQ) 1872 1873 on = None 1874 if self._match(TokenType.ON): 1875 on = True 1876 elif self._match_text_seq("OFF"): 1877 on = False 1878 1879 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1880 1881 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1882 return self.expression( 1883 exp.Cluster, 1884 expressions=( 1885 self._parse_wrapped_csv(self._parse_ordered) 1886 if wrapped 1887 else self._parse_csv(self._parse_ordered) 1888 ), 1889 ) 1890 1891 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1892 self._match_text_seq("BY") 1893 1894 self._match_l_paren() 1895 expressions = self._parse_csv(self._parse_column) 1896 self._match_r_paren() 1897 1898 if self._match_text_seq("SORTED", "BY"): 1899 self._match_l_paren() 1900 sorted_by = self._parse_csv(self._parse_ordered) 1901 self._match_r_paren() 1902 else: 1903 sorted_by = None 1904 1905 self._match(TokenType.INTO) 1906 buckets = self._parse_number() 1907 self._match_text_seq("BUCKETS") 1908 1909 return self.expression( 1910 exp.ClusteredByProperty, 1911 expressions=expressions, 1912 sorted_by=sorted_by, 1913 buckets=buckets, 1914 ) 1915 1916 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1917 if not self._match_text_seq("GRANTS"): 1918 self._retreat(self._index - 1) 1919 return None 1920 1921 return self.expression(exp.CopyGrantsProperty) 1922 1923 def _parse_freespace(self) -> exp.FreespaceProperty: 1924 self._match(TokenType.EQ) 1925 return self.expression( 1926 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1927 ) 1928 1929 def _parse_mergeblockratio( 1930 self, no: bool = False, default: bool = False 1931 ) -> exp.MergeBlockRatioProperty: 1932 if self._match(TokenType.EQ): 1933 return self.expression( 1934 exp.MergeBlockRatioProperty, 1935 this=self._parse_number(), 1936 percent=self._match(TokenType.PERCENT), 1937 ) 1938 1939 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1940 1941 def _parse_datablocksize( 1942 self, 1943 default: t.Optional[bool] = None, 1944 minimum: t.Optional[bool] = None, 1945 maximum: t.Optional[bool] = None, 1946 ) -> exp.DataBlocksizeProperty: 1947 self._match(TokenType.EQ) 1948 size = self._parse_number() 1949 1950 units = None 1951 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1952 units = self._prev.text 1953 1954 return self.expression( 1955 exp.DataBlocksizeProperty, 1956 size=size, 1957 units=units, 1958 default=default, 1959 minimum=minimum, 1960 maximum=maximum, 1961 ) 1962 1963 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1964 self._match(TokenType.EQ) 1965 always = self._match_text_seq("ALWAYS") 1966 manual = self._match_text_seq("MANUAL") 1967 never = self._match_text_seq("NEVER") 1968 default = self._match_text_seq("DEFAULT") 1969 1970 autotemp = None 1971 if self._match_text_seq("AUTOTEMP"): 1972 autotemp = self._parse_schema() 1973 1974 return self.expression( 1975 exp.BlockCompressionProperty, 1976 always=always, 1977 manual=manual, 1978 never=never, 1979 default=default, 1980 autotemp=autotemp, 1981 ) 1982 1983 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1984 index = self._index 1985 no = self._match_text_seq("NO") 1986 concurrent = self._match_text_seq("CONCURRENT") 1987 1988 if not self._match_text_seq("ISOLATED", "LOADING"): 1989 self._retreat(index) 1990 return None 1991 1992 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1993 return self.expression( 1994 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1995 ) 1996 1997 def _parse_locking(self) -> exp.LockingProperty: 1998 if self._match(TokenType.TABLE): 1999 kind = "TABLE" 2000 elif self._match(TokenType.VIEW): 2001 kind = "VIEW" 2002 elif self._match(TokenType.ROW): 2003 kind = "ROW" 2004 elif self._match_text_seq("DATABASE"): 2005 kind = "DATABASE" 2006 else: 2007 kind = None 2008 2009 if kind in ("DATABASE", "TABLE", "VIEW"): 2010 this = self._parse_table_parts() 2011 else: 2012 this = None 2013 2014 if self._match(TokenType.FOR): 2015 for_or_in = "FOR" 2016 elif self._match(TokenType.IN): 2017 for_or_in = "IN" 2018 else: 2019 for_or_in = None 2020 2021 if self._match_text_seq("ACCESS"): 2022 lock_type = "ACCESS" 2023 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2024 lock_type = "EXCLUSIVE" 2025 elif self._match_text_seq("SHARE"): 2026 lock_type = "SHARE" 2027 elif self._match_text_seq("READ"): 2028 lock_type = "READ" 2029 elif self._match_text_seq("WRITE"): 2030 lock_type = "WRITE" 2031 elif self._match_text_seq("CHECKSUM"): 2032 lock_type = "CHECKSUM" 2033 else: 2034 lock_type = None 2035 2036 override = self._match_text_seq("OVERRIDE") 2037 2038 return self.expression( 2039 exp.LockingProperty, 2040 this=this, 2041 kind=kind, 2042 for_or_in=for_or_in, 2043 lock_type=lock_type, 2044 override=override, 2045 ) 2046 2047 def _parse_partition_by(self) -> t.List[exp.Expression]: 2048 if self._match(TokenType.PARTITION_BY): 2049 return self._parse_csv(self._parse_conjunction) 2050 return [] 2051 2052 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2053 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2054 if self._match_text_seq("MINVALUE"): 2055 return exp.var("MINVALUE") 2056 if self._match_text_seq("MAXVALUE"): 2057 return exp.var("MAXVALUE") 2058 return self._parse_bitwise() 2059 2060 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2061 expression = None 2062 from_expressions = None 2063 to_expressions = None 2064 2065 if self._match(TokenType.IN): 2066 this = self._parse_wrapped_csv(self._parse_bitwise) 2067 elif self._match(TokenType.FROM): 2068 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2069 self._match_text_seq("TO") 2070 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2071 elif self._match_text_seq("WITH", "(", "MODULUS"): 2072 this = self._parse_number() 2073 self._match_text_seq(",", "REMAINDER") 2074 expression = self._parse_number() 2075 self._match_r_paren() 2076 else: 2077 self.raise_error("Failed to parse partition bound spec.") 2078 2079 return self.expression( 2080 exp.PartitionBoundSpec, 2081 this=this, 2082 expression=expression, 2083 from_expressions=from_expressions, 2084 to_expressions=to_expressions, 2085 ) 2086 2087 # https://www.postgresql.org/docs/current/sql-createtable.html 2088 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2089 if not self._match_text_seq("OF"): 2090 self._retreat(self._index - 1) 2091 return None 2092 2093 this = self._parse_table(schema=True) 2094 2095 if self._match(TokenType.DEFAULT): 2096 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2097 elif self._match_text_seq("FOR", "VALUES"): 2098 expression = self._parse_partition_bound_spec() 2099 else: 2100 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2101 2102 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2103 2104 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2105 self._match(TokenType.EQ) 2106 return self.expression( 2107 exp.PartitionedByProperty, 2108 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2109 ) 2110 2111 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2112 if self._match_text_seq("AND", "STATISTICS"): 2113 statistics = True 2114 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2115 statistics = False 2116 else: 2117 statistics = None 2118 2119 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2120 2121 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2122 if self._match_text_seq("SQL"): 2123 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2124 return None 2125 2126 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2127 if self._match_text_seq("SQL", "DATA"): 2128 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2129 return None 2130 2131 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2132 if self._match_text_seq("PRIMARY", "INDEX"): 2133 return exp.NoPrimaryIndexProperty() 2134 if self._match_text_seq("SQL"): 2135 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2136 return None 2137 2138 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2139 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2140 return exp.OnCommitProperty() 2141 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2142 return exp.OnCommitProperty(delete=True) 2143 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2144 2145 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2146 if self._match_text_seq("SQL", "DATA"): 2147 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2148 return None 2149 2150 def _parse_distkey(self) -> exp.DistKeyProperty: 2151 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2152 2153 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2154 table = self._parse_table(schema=True) 2155 2156 options = [] 2157 while self._match_texts(("INCLUDING", "EXCLUDING")): 2158 this = self._prev.text.upper() 2159 2160 id_var = self._parse_id_var() 2161 if not id_var: 2162 return None 2163 2164 options.append( 2165 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2166 ) 2167 2168 return self.expression(exp.LikeProperty, this=table, expressions=options) 2169 2170 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2171 return self.expression( 2172 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2173 ) 2174 2175 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2176 self._match(TokenType.EQ) 2177 return self.expression( 2178 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2179 ) 2180 2181 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2182 self._match_text_seq("WITH", "CONNECTION") 2183 return self.expression( 2184 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2185 ) 2186 2187 def _parse_returns(self) -> exp.ReturnsProperty: 2188 value: t.Optional[exp.Expression] 2189 is_table = self._match(TokenType.TABLE) 2190 2191 if is_table: 2192 if self._match(TokenType.LT): 2193 value = self.expression( 2194 exp.Schema, 2195 this="TABLE", 2196 expressions=self._parse_csv(self._parse_struct_types), 2197 ) 2198 if not self._match(TokenType.GT): 2199 self.raise_error("Expecting >") 2200 else: 2201 value = self._parse_schema(exp.var("TABLE")) 2202 else: 2203 value = self._parse_types() 2204 2205 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2206 2207 def _parse_describe(self) -> exp.Describe: 2208 kind = self._match_set(self.CREATABLES) and self._prev.text 2209 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2210 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2211 style = None 2212 self._retreat(self._index - 1) 2213 this = self._parse_table(schema=True) 2214 properties = self._parse_properties() 2215 expressions = properties.expressions if properties else None 2216 return self.expression( 2217 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2218 ) 2219 2220 def _parse_insert(self) -> exp.Insert: 2221 comments = ensure_list(self._prev_comments) 2222 hint = self._parse_hint() 2223 overwrite = self._match(TokenType.OVERWRITE) 2224 ignore = self._match(TokenType.IGNORE) 2225 local = self._match_text_seq("LOCAL") 2226 alternative = None 2227 is_function = None 2228 2229 if self._match_text_seq("DIRECTORY"): 2230 this: t.Optional[exp.Expression] = self.expression( 2231 exp.Directory, 2232 this=self._parse_var_or_string(), 2233 local=local, 2234 row_format=self._parse_row_format(match_row=True), 2235 ) 2236 else: 2237 if self._match(TokenType.OR): 2238 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2239 2240 self._match(TokenType.INTO) 2241 comments += ensure_list(self._prev_comments) 2242 self._match(TokenType.TABLE) 2243 is_function = self._match(TokenType.FUNCTION) 2244 2245 this = ( 2246 self._parse_table(schema=True, parse_partition=True) 2247 if not is_function 2248 else self._parse_function() 2249 ) 2250 2251 returning = self._parse_returning() 2252 2253 return self.expression( 2254 exp.Insert, 2255 comments=comments, 2256 hint=hint, 2257 is_function=is_function, 2258 this=this, 2259 stored=self._match_text_seq("STORED") and self._parse_stored(), 2260 by_name=self._match_text_seq("BY", "NAME"), 2261 exists=self._parse_exists(), 2262 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2263 and self._parse_conjunction(), 2264 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2265 conflict=self._parse_on_conflict(), 2266 returning=returning or self._parse_returning(), 2267 overwrite=overwrite, 2268 alternative=alternative, 2269 ignore=ignore, 2270 ) 2271 2272 def _parse_kill(self) -> exp.Kill: 2273 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2274 2275 return self.expression( 2276 exp.Kill, 2277 this=self._parse_primary(), 2278 kind=kind, 2279 ) 2280 2281 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2282 conflict = self._match_text_seq("ON", "CONFLICT") 2283 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2284 2285 if not conflict and not duplicate: 2286 return None 2287 2288 conflict_keys = None 2289 constraint = None 2290 2291 if conflict: 2292 if self._match_text_seq("ON", "CONSTRAINT"): 2293 constraint = self._parse_id_var() 2294 elif self._match(TokenType.L_PAREN): 2295 conflict_keys = self._parse_csv(self._parse_id_var) 2296 self._match_r_paren() 2297 2298 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2299 if self._prev.token_type == TokenType.UPDATE: 2300 self._match(TokenType.SET) 2301 expressions = self._parse_csv(self._parse_equality) 2302 else: 2303 expressions = None 2304 2305 return self.expression( 2306 exp.OnConflict, 2307 duplicate=duplicate, 2308 expressions=expressions, 2309 action=action, 2310 conflict_keys=conflict_keys, 2311 constraint=constraint, 2312 ) 2313 2314 def _parse_returning(self) -> t.Optional[exp.Returning]: 2315 if not self._match(TokenType.RETURNING): 2316 return None 2317 return self.expression( 2318 exp.Returning, 2319 expressions=self._parse_csv(self._parse_expression), 2320 into=self._match(TokenType.INTO) and self._parse_table_part(), 2321 ) 2322 2323 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2324 if not self._match(TokenType.FORMAT): 2325 return None 2326 return self._parse_row_format() 2327 2328 def _parse_row_format( 2329 self, match_row: bool = False 2330 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2331 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2332 return None 2333 2334 if self._match_text_seq("SERDE"): 2335 this = self._parse_string() 2336 2337 serde_properties = None 2338 if self._match(TokenType.SERDE_PROPERTIES): 2339 serde_properties = self.expression( 2340 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2341 ) 2342 2343 return self.expression( 2344 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2345 ) 2346 2347 self._match_text_seq("DELIMITED") 2348 2349 kwargs = {} 2350 2351 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2352 kwargs["fields"] = self._parse_string() 2353 if self._match_text_seq("ESCAPED", "BY"): 2354 kwargs["escaped"] = self._parse_string() 2355 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2356 kwargs["collection_items"] = self._parse_string() 2357 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2358 kwargs["map_keys"] = self._parse_string() 2359 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2360 kwargs["lines"] = self._parse_string() 2361 if self._match_text_seq("NULL", "DEFINED", "AS"): 2362 kwargs["null"] = self._parse_string() 2363 2364 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2365 2366 def _parse_load(self) -> exp.LoadData | exp.Command: 2367 if self._match_text_seq("DATA"): 2368 local = self._match_text_seq("LOCAL") 2369 self._match_text_seq("INPATH") 2370 inpath = self._parse_string() 2371 overwrite = self._match(TokenType.OVERWRITE) 2372 self._match_pair(TokenType.INTO, TokenType.TABLE) 2373 2374 return self.expression( 2375 exp.LoadData, 2376 this=self._parse_table(schema=True), 2377 local=local, 2378 overwrite=overwrite, 2379 inpath=inpath, 2380 partition=self._parse_partition(), 2381 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2382 serde=self._match_text_seq("SERDE") and self._parse_string(), 2383 ) 2384 return self._parse_as_command(self._prev) 2385 2386 def _parse_delete(self) -> exp.Delete: 2387 # This handles MySQL's "Multiple-Table Syntax" 2388 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2389 tables = None 2390 comments = self._prev_comments 2391 if not self._match(TokenType.FROM, advance=False): 2392 tables = self._parse_csv(self._parse_table) or None 2393 2394 returning = self._parse_returning() 2395 2396 return self.expression( 2397 exp.Delete, 2398 comments=comments, 2399 tables=tables, 2400 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2401 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2402 where=self._parse_where(), 2403 returning=returning or self._parse_returning(), 2404 limit=self._parse_limit(), 2405 ) 2406 2407 def _parse_update(self) -> exp.Update: 2408 comments = self._prev_comments 2409 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2410 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2411 returning = self._parse_returning() 2412 return self.expression( 2413 exp.Update, 2414 comments=comments, 2415 **{ # type: ignore 2416 "this": this, 2417 "expressions": expressions, 2418 "from": self._parse_from(joins=True), 2419 "where": self._parse_where(), 2420 "returning": returning or self._parse_returning(), 2421 "order": self._parse_order(), 2422 "limit": self._parse_limit(), 2423 }, 2424 ) 2425 2426 def _parse_uncache(self) -> exp.Uncache: 2427 if not self._match(TokenType.TABLE): 2428 self.raise_error("Expecting TABLE after UNCACHE") 2429 2430 return self.expression( 2431 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2432 ) 2433 2434 def _parse_cache(self) -> exp.Cache: 2435 lazy = self._match_text_seq("LAZY") 2436 self._match(TokenType.TABLE) 2437 table = self._parse_table(schema=True) 2438 2439 options = [] 2440 if self._match_text_seq("OPTIONS"): 2441 self._match_l_paren() 2442 k = self._parse_string() 2443 self._match(TokenType.EQ) 2444 v = self._parse_string() 2445 options = [k, v] 2446 self._match_r_paren() 2447 2448 self._match(TokenType.ALIAS) 2449 return self.expression( 2450 exp.Cache, 2451 this=table, 2452 lazy=lazy, 2453 options=options, 2454 expression=self._parse_select(nested=True), 2455 ) 2456 2457 def _parse_partition(self) -> t.Optional[exp.Partition]: 2458 if not self._match(TokenType.PARTITION): 2459 return None 2460 2461 return self.expression( 2462 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2463 ) 2464 2465 def _parse_value(self) -> exp.Tuple: 2466 if self._match(TokenType.L_PAREN): 2467 expressions = self._parse_csv(self._parse_expression) 2468 self._match_r_paren() 2469 return self.expression(exp.Tuple, expressions=expressions) 2470 2471 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2472 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2473 2474 def _parse_projections(self) -> t.List[exp.Expression]: 2475 return self._parse_expressions() 2476 2477 def _parse_select( 2478 self, 2479 nested: bool = False, 2480 table: bool = False, 2481 parse_subquery_alias: bool = True, 2482 parse_set_operation: bool = True, 2483 ) -> t.Optional[exp.Expression]: 2484 cte = self._parse_with() 2485 2486 if cte: 2487 this = self._parse_statement() 2488 2489 if not this: 2490 self.raise_error("Failed to parse any statement following CTE") 2491 return cte 2492 2493 if "with" in this.arg_types: 2494 this.set("with", cte) 2495 else: 2496 self.raise_error(f"{this.key} does not support CTE") 2497 this = cte 2498 2499 return this 2500 2501 # duckdb supports leading with FROM x 2502 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2503 2504 if self._match(TokenType.SELECT): 2505 comments = self._prev_comments 2506 2507 hint = self._parse_hint() 2508 all_ = self._match(TokenType.ALL) 2509 distinct = self._match_set(self.DISTINCT_TOKENS) 2510 2511 kind = ( 2512 self._match(TokenType.ALIAS) 2513 and self._match_texts(("STRUCT", "VALUE")) 2514 and self._prev.text.upper() 2515 ) 2516 2517 if distinct: 2518 distinct = self.expression( 2519 exp.Distinct, 2520 on=self._parse_value() if self._match(TokenType.ON) else None, 2521 ) 2522 2523 if all_ and distinct: 2524 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2525 2526 limit = self._parse_limit(top=True) 2527 projections = self._parse_projections() 2528 2529 this = self.expression( 2530 exp.Select, 2531 kind=kind, 2532 hint=hint, 2533 distinct=distinct, 2534 expressions=projections, 2535 limit=limit, 2536 ) 2537 this.comments = comments 2538 2539 into = self._parse_into() 2540 if into: 2541 this.set("into", into) 2542 2543 if not from_: 2544 from_ = self._parse_from() 2545 2546 if from_: 2547 this.set("from", from_) 2548 2549 this = self._parse_query_modifiers(this) 2550 elif (table or nested) and self._match(TokenType.L_PAREN): 2551 if self._match(TokenType.PIVOT): 2552 this = self._parse_simplified_pivot() 2553 elif self._match(TokenType.FROM): 2554 this = exp.select("*").from_( 2555 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2556 ) 2557 else: 2558 this = ( 2559 self._parse_table() 2560 if table 2561 else self._parse_select(nested=True, parse_set_operation=False) 2562 ) 2563 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2564 2565 self._match_r_paren() 2566 2567 # We return early here so that the UNION isn't attached to the subquery by the 2568 # following call to _parse_set_operations, but instead becomes the parent node 2569 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2570 elif self._match(TokenType.VALUES, advance=False): 2571 this = self._parse_derived_table_values() 2572 elif from_: 2573 this = exp.select("*").from_(from_.this, copy=False) 2574 else: 2575 this = None 2576 2577 if parse_set_operation: 2578 return self._parse_set_operations(this) 2579 return this 2580 2581 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2582 if not skip_with_token and not self._match(TokenType.WITH): 2583 return None 2584 2585 comments = self._prev_comments 2586 recursive = self._match(TokenType.RECURSIVE) 2587 2588 expressions = [] 2589 while True: 2590 expressions.append(self._parse_cte()) 2591 2592 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2593 break 2594 else: 2595 self._match(TokenType.WITH) 2596 2597 return self.expression( 2598 exp.With, comments=comments, expressions=expressions, recursive=recursive 2599 ) 2600 2601 def _parse_cte(self) -> exp.CTE: 2602 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2603 if not alias or not alias.this: 2604 self.raise_error("Expected CTE to have alias") 2605 2606 self._match(TokenType.ALIAS) 2607 2608 if self._match_text_seq("NOT", "MATERIALIZED"): 2609 materialized = False 2610 elif self._match_text_seq("MATERIALIZED"): 2611 materialized = True 2612 else: 2613 materialized = None 2614 2615 return self.expression( 2616 exp.CTE, 2617 this=self._parse_wrapped(self._parse_statement), 2618 alias=alias, 2619 materialized=materialized, 2620 ) 2621 2622 def _parse_table_alias( 2623 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2624 ) -> t.Optional[exp.TableAlias]: 2625 any_token = self._match(TokenType.ALIAS) 2626 alias = ( 2627 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2628 or self._parse_string_as_identifier() 2629 ) 2630 2631 index = self._index 2632 if self._match(TokenType.L_PAREN): 2633 columns = self._parse_csv(self._parse_function_parameter) 2634 self._match_r_paren() if columns else self._retreat(index) 2635 else: 2636 columns = None 2637 2638 if not alias and not columns: 2639 return None 2640 2641 return self.expression(exp.TableAlias, this=alias, columns=columns) 2642 2643 def _parse_subquery( 2644 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2645 ) -> t.Optional[exp.Subquery]: 2646 if not this: 2647 return None 2648 2649 return self.expression( 2650 exp.Subquery, 2651 this=this, 2652 pivots=self._parse_pivots(), 2653 alias=self._parse_table_alias() if parse_alias else None, 2654 ) 2655 2656 def _implicit_unnests_to_explicit(self, this: E) -> E: 2657 from sqlglot.optimizer.normalize_identifiers import ( 2658 normalize_identifiers as _norm, 2659 ) 2660 2661 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2662 for i, join in enumerate(this.args.get("joins") or []): 2663 table = join.this 2664 normalized_table = table.copy() 2665 normalized_table.meta["maybe_column"] = True 2666 normalized_table = _norm(normalized_table, dialect=self.dialect) 2667 2668 if isinstance(table, exp.Table) and not join.args.get("on"): 2669 if normalized_table.parts[0].name in refs: 2670 table_as_column = table.to_column() 2671 unnest = exp.Unnest(expressions=[table_as_column]) 2672 2673 # Table.to_column creates a parent Alias node that we want to convert to 2674 # a TableAlias and attach to the Unnest, so it matches the parser's output 2675 if isinstance(table.args.get("alias"), exp.TableAlias): 2676 table_as_column.replace(table_as_column.this) 2677 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2678 2679 table.replace(unnest) 2680 2681 refs.add(normalized_table.alias_or_name) 2682 2683 return this 2684 2685 def _parse_query_modifiers( 2686 self, this: t.Optional[exp.Expression] 2687 ) -> t.Optional[exp.Expression]: 2688 if isinstance(this, (exp.Query, exp.Table)): 2689 for join in self._parse_joins(): 2690 this.append("joins", join) 2691 for lateral in iter(self._parse_lateral, None): 2692 this.append("laterals", lateral) 2693 2694 while True: 2695 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2696 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2697 key, expression = parser(self) 2698 2699 if expression: 2700 this.set(key, expression) 2701 if key == "limit": 2702 offset = expression.args.pop("offset", None) 2703 2704 if offset: 2705 offset = exp.Offset(expression=offset) 2706 this.set("offset", offset) 2707 2708 limit_by_expressions = expression.expressions 2709 expression.set("expressions", None) 2710 offset.set("expressions", limit_by_expressions) 2711 continue 2712 break 2713 2714 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2715 this = self._implicit_unnests_to_explicit(this) 2716 2717 return this 2718 2719 def _parse_hint(self) -> t.Optional[exp.Hint]: 2720 if self._match(TokenType.HINT): 2721 hints = [] 2722 for hint in iter( 2723 lambda: self._parse_csv( 2724 lambda: self._parse_function() or self._parse_var(upper=True) 2725 ), 2726 [], 2727 ): 2728 hints.extend(hint) 2729 2730 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2731 self.raise_error("Expected */ after HINT") 2732 2733 return self.expression(exp.Hint, expressions=hints) 2734 2735 return None 2736 2737 def _parse_into(self) -> t.Optional[exp.Into]: 2738 if not self._match(TokenType.INTO): 2739 return None 2740 2741 temp = self._match(TokenType.TEMPORARY) 2742 unlogged = self._match_text_seq("UNLOGGED") 2743 self._match(TokenType.TABLE) 2744 2745 return self.expression( 2746 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2747 ) 2748 2749 def _parse_from( 2750 self, joins: bool = False, skip_from_token: bool = False 2751 ) -> t.Optional[exp.From]: 2752 if not skip_from_token and not self._match(TokenType.FROM): 2753 return None 2754 2755 return self.expression( 2756 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2757 ) 2758 2759 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2760 return self.expression( 2761 exp.MatchRecognizeMeasure, 2762 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2763 this=self._parse_expression(), 2764 ) 2765 2766 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2767 if not self._match(TokenType.MATCH_RECOGNIZE): 2768 return None 2769 2770 self._match_l_paren() 2771 2772 partition = self._parse_partition_by() 2773 order = self._parse_order() 2774 2775 measures = ( 2776 self._parse_csv(self._parse_match_recognize_measure) 2777 if self._match_text_seq("MEASURES") 2778 else None 2779 ) 2780 2781 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2782 rows = exp.var("ONE ROW PER MATCH") 2783 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2784 text = "ALL ROWS PER MATCH" 2785 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2786 text += " SHOW EMPTY MATCHES" 2787 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2788 text += " OMIT EMPTY MATCHES" 2789 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2790 text += " WITH UNMATCHED ROWS" 2791 rows = exp.var(text) 2792 else: 2793 rows = None 2794 2795 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2796 text = "AFTER MATCH SKIP" 2797 if self._match_text_seq("PAST", "LAST", "ROW"): 2798 text += " PAST LAST ROW" 2799 elif self._match_text_seq("TO", "NEXT", "ROW"): 2800 text += " TO NEXT ROW" 2801 elif self._match_text_seq("TO", "FIRST"): 2802 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2803 elif self._match_text_seq("TO", "LAST"): 2804 text += f" TO LAST {self._advance_any().text}" # type: ignore 2805 after = exp.var(text) 2806 else: 2807 after = None 2808 2809 if self._match_text_seq("PATTERN"): 2810 self._match_l_paren() 2811 2812 if not self._curr: 2813 self.raise_error("Expecting )", self._curr) 2814 2815 paren = 1 2816 start = self._curr 2817 2818 while self._curr and paren > 0: 2819 if self._curr.token_type == TokenType.L_PAREN: 2820 paren += 1 2821 if self._curr.token_type == TokenType.R_PAREN: 2822 paren -= 1 2823 2824 end = self._prev 2825 self._advance() 2826 2827 if paren > 0: 2828 self.raise_error("Expecting )", self._curr) 2829 2830 pattern = exp.var(self._find_sql(start, end)) 2831 else: 2832 pattern = None 2833 2834 define = ( 2835 self._parse_csv(self._parse_name_as_expression) 2836 if self._match_text_seq("DEFINE") 2837 else None 2838 ) 2839 2840 self._match_r_paren() 2841 2842 return self.expression( 2843 exp.MatchRecognize, 2844 partition_by=partition, 2845 order=order, 2846 measures=measures, 2847 rows=rows, 2848 after=after, 2849 pattern=pattern, 2850 define=define, 2851 alias=self._parse_table_alias(), 2852 ) 2853 2854 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2855 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2856 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2857 cross_apply = False 2858 2859 if cross_apply is not None: 2860 this = self._parse_select(table=True) 2861 view = None 2862 outer = None 2863 elif self._match(TokenType.LATERAL): 2864 this = self._parse_select(table=True) 2865 view = self._match(TokenType.VIEW) 2866 outer = self._match(TokenType.OUTER) 2867 else: 2868 return None 2869 2870 if not this: 2871 this = ( 2872 self._parse_unnest() 2873 or self._parse_function() 2874 or self._parse_id_var(any_token=False) 2875 ) 2876 2877 while self._match(TokenType.DOT): 2878 this = exp.Dot( 2879 this=this, 2880 expression=self._parse_function() or self._parse_id_var(any_token=False), 2881 ) 2882 2883 if view: 2884 table = self._parse_id_var(any_token=False) 2885 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2886 table_alias: t.Optional[exp.TableAlias] = self.expression( 2887 exp.TableAlias, this=table, columns=columns 2888 ) 2889 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2890 # We move the alias from the lateral's child node to the lateral itself 2891 table_alias = this.args["alias"].pop() 2892 else: 2893 table_alias = self._parse_table_alias() 2894 2895 return self.expression( 2896 exp.Lateral, 2897 this=this, 2898 view=view, 2899 outer=outer, 2900 alias=table_alias, 2901 cross_apply=cross_apply, 2902 ) 2903 2904 def _parse_join_parts( 2905 self, 2906 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2907 return ( 2908 self._match_set(self.JOIN_METHODS) and self._prev, 2909 self._match_set(self.JOIN_SIDES) and self._prev, 2910 self._match_set(self.JOIN_KINDS) and self._prev, 2911 ) 2912 2913 def _parse_join( 2914 self, skip_join_token: bool = False, parse_bracket: bool = False 2915 ) -> t.Optional[exp.Join]: 2916 if self._match(TokenType.COMMA): 2917 return self.expression(exp.Join, this=self._parse_table()) 2918 2919 index = self._index 2920 method, side, kind = self._parse_join_parts() 2921 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2922 join = self._match(TokenType.JOIN) 2923 2924 if not skip_join_token and not join: 2925 self._retreat(index) 2926 kind = None 2927 method = None 2928 side = None 2929 2930 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2931 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2932 2933 if not skip_join_token and not join and not outer_apply and not cross_apply: 2934 return None 2935 2936 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2937 2938 if method: 2939 kwargs["method"] = method.text 2940 if side: 2941 kwargs["side"] = side.text 2942 if kind: 2943 kwargs["kind"] = kind.text 2944 if hint: 2945 kwargs["hint"] = hint 2946 2947 if self._match(TokenType.MATCH_CONDITION): 2948 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2949 2950 if self._match(TokenType.ON): 2951 kwargs["on"] = self._parse_conjunction() 2952 elif self._match(TokenType.USING): 2953 kwargs["using"] = self._parse_wrapped_id_vars() 2954 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2955 kind and kind.token_type == TokenType.CROSS 2956 ): 2957 index = self._index 2958 joins: t.Optional[list] = list(self._parse_joins()) 2959 2960 if joins and self._match(TokenType.ON): 2961 kwargs["on"] = self._parse_conjunction() 2962 elif joins and self._match(TokenType.USING): 2963 kwargs["using"] = self._parse_wrapped_id_vars() 2964 else: 2965 joins = None 2966 self._retreat(index) 2967 2968 kwargs["this"].set("joins", joins if joins else None) 2969 2970 comments = [c for token in (method, side, kind) if token for c in token.comments] 2971 return self.expression(exp.Join, comments=comments, **kwargs) 2972 2973 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2974 this = self._parse_conjunction() 2975 2976 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2977 return this 2978 2979 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2980 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2981 2982 return this 2983 2984 def _parse_index_params(self) -> exp.IndexParameters: 2985 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2986 2987 if self._match(TokenType.L_PAREN, advance=False): 2988 columns = self._parse_wrapped_csv(self._parse_with_operator) 2989 else: 2990 columns = None 2991 2992 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2993 partition_by = self._parse_partition_by() 2994 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2995 tablespace = ( 2996 self._parse_var(any_token=True) 2997 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2998 else None 2999 ) 3000 where = self._parse_where() 3001 3002 return self.expression( 3003 exp.IndexParameters, 3004 using=using, 3005 columns=columns, 3006 include=include, 3007 partition_by=partition_by, 3008 where=where, 3009 with_storage=with_storage, 3010 tablespace=tablespace, 3011 ) 3012 3013 def _parse_index( 3014 self, 3015 index: t.Optional[exp.Expression] = None, 3016 ) -> t.Optional[exp.Index]: 3017 if index: 3018 unique = None 3019 primary = None 3020 amp = None 3021 3022 self._match(TokenType.ON) 3023 self._match(TokenType.TABLE) # hive 3024 table = self._parse_table_parts(schema=True) 3025 else: 3026 unique = self._match(TokenType.UNIQUE) 3027 primary = self._match_text_seq("PRIMARY") 3028 amp = self._match_text_seq("AMP") 3029 3030 if not self._match(TokenType.INDEX): 3031 return None 3032 3033 index = self._parse_id_var() 3034 table = None 3035 3036 params = self._parse_index_params() 3037 3038 return self.expression( 3039 exp.Index, 3040 this=index, 3041 table=table, 3042 unique=unique, 3043 primary=primary, 3044 amp=amp, 3045 params=params, 3046 ) 3047 3048 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3049 hints: t.List[exp.Expression] = [] 3050 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3051 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3052 hints.append( 3053 self.expression( 3054 exp.WithTableHint, 3055 expressions=self._parse_csv( 3056 lambda: self._parse_function() or self._parse_var(any_token=True) 3057 ), 3058 ) 3059 ) 3060 self._match_r_paren() 3061 else: 3062 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3063 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3064 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3065 3066 self._match_texts(("INDEX", "KEY")) 3067 if self._match(TokenType.FOR): 3068 hint.set("target", self._advance_any() and self._prev.text.upper()) 3069 3070 hint.set("expressions", self._parse_wrapped_id_vars()) 3071 hints.append(hint) 3072 3073 return hints or None 3074 3075 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3076 return ( 3077 (not schema and self._parse_function(optional_parens=False)) 3078 or self._parse_id_var(any_token=False) 3079 or self._parse_string_as_identifier() 3080 or self._parse_placeholder() 3081 ) 3082 3083 def _parse_table_parts( 3084 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3085 ) -> exp.Table: 3086 catalog = None 3087 db = None 3088 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3089 3090 while self._match(TokenType.DOT): 3091 if catalog: 3092 # This allows nesting the table in arbitrarily many dot expressions if needed 3093 table = self.expression( 3094 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3095 ) 3096 else: 3097 catalog = db 3098 db = table 3099 # "" used for tsql FROM a..b case 3100 table = self._parse_table_part(schema=schema) or "" 3101 3102 if ( 3103 wildcard 3104 and self._is_connected() 3105 and (isinstance(table, exp.Identifier) or not table) 3106 and self._match(TokenType.STAR) 3107 ): 3108 if isinstance(table, exp.Identifier): 3109 table.args["this"] += "*" 3110 else: 3111 table = exp.Identifier(this="*") 3112 3113 if is_db_reference: 3114 catalog = db 3115 db = table 3116 table = None 3117 3118 if not table and not is_db_reference: 3119 self.raise_error(f"Expected table name but got {self._curr}") 3120 if not db and is_db_reference: 3121 self.raise_error(f"Expected database name but got {self._curr}") 3122 3123 return self.expression( 3124 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3125 ) 3126 3127 def _parse_table( 3128 self, 3129 schema: bool = False, 3130 joins: bool = False, 3131 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3132 parse_bracket: bool = False, 3133 is_db_reference: bool = False, 3134 parse_partition: bool = False, 3135 ) -> t.Optional[exp.Expression]: 3136 lateral = self._parse_lateral() 3137 if lateral: 3138 return lateral 3139 3140 unnest = self._parse_unnest() 3141 if unnest: 3142 return unnest 3143 3144 values = self._parse_derived_table_values() 3145 if values: 3146 return values 3147 3148 subquery = self._parse_select(table=True) 3149 if subquery: 3150 if not subquery.args.get("pivots"): 3151 subquery.set("pivots", self._parse_pivots()) 3152 return subquery 3153 3154 bracket = parse_bracket and self._parse_bracket(None) 3155 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3156 3157 only = self._match(TokenType.ONLY) 3158 3159 this = t.cast( 3160 exp.Expression, 3161 bracket 3162 or self._parse_bracket( 3163 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3164 ), 3165 ) 3166 3167 if only: 3168 this.set("only", only) 3169 3170 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3171 self._match_text_seq("*") 3172 3173 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3174 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3175 this.set("partition", self._parse_partition()) 3176 3177 if schema: 3178 return self._parse_schema(this=this) 3179 3180 version = self._parse_version() 3181 3182 if version: 3183 this.set("version", version) 3184 3185 if self.dialect.ALIAS_POST_TABLESAMPLE: 3186 table_sample = self._parse_table_sample() 3187 3188 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3189 if alias: 3190 this.set("alias", alias) 3191 3192 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3193 return self.expression( 3194 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3195 ) 3196 3197 this.set("hints", self._parse_table_hints()) 3198 3199 if not this.args.get("pivots"): 3200 this.set("pivots", self._parse_pivots()) 3201 3202 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3203 table_sample = self._parse_table_sample() 3204 3205 if table_sample: 3206 table_sample.set("this", this) 3207 this = table_sample 3208 3209 if joins: 3210 for join in self._parse_joins(): 3211 this.append("joins", join) 3212 3213 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3214 this.set("ordinality", True) 3215 this.set("alias", self._parse_table_alias()) 3216 3217 return this 3218 3219 def _parse_version(self) -> t.Optional[exp.Version]: 3220 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3221 this = "TIMESTAMP" 3222 elif self._match(TokenType.VERSION_SNAPSHOT): 3223 this = "VERSION" 3224 else: 3225 return None 3226 3227 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3228 kind = self._prev.text.upper() 3229 start = self._parse_bitwise() 3230 self._match_texts(("TO", "AND")) 3231 end = self._parse_bitwise() 3232 expression: t.Optional[exp.Expression] = self.expression( 3233 exp.Tuple, expressions=[start, end] 3234 ) 3235 elif self._match_text_seq("CONTAINED", "IN"): 3236 kind = "CONTAINED IN" 3237 expression = self.expression( 3238 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3239 ) 3240 elif self._match(TokenType.ALL): 3241 kind = "ALL" 3242 expression = None 3243 else: 3244 self._match_text_seq("AS", "OF") 3245 kind = "AS OF" 3246 expression = self._parse_type() 3247 3248 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3249 3250 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3251 if not self._match(TokenType.UNNEST): 3252 return None 3253 3254 expressions = self._parse_wrapped_csv(self._parse_equality) 3255 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3256 3257 alias = self._parse_table_alias() if with_alias else None 3258 3259 if alias: 3260 if self.dialect.UNNEST_COLUMN_ONLY: 3261 if alias.args.get("columns"): 3262 self.raise_error("Unexpected extra column alias in unnest.") 3263 3264 alias.set("columns", [alias.this]) 3265 alias.set("this", None) 3266 3267 columns = alias.args.get("columns") or [] 3268 if offset and len(expressions) < len(columns): 3269 offset = columns.pop() 3270 3271 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3272 self._match(TokenType.ALIAS) 3273 offset = self._parse_id_var( 3274 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3275 ) or exp.to_identifier("offset") 3276 3277 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3278 3279 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3280 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3281 if not is_derived and not self._match_text_seq("VALUES"): 3282 return None 3283 3284 expressions = self._parse_csv(self._parse_value) 3285 alias = self._parse_table_alias() 3286 3287 if is_derived: 3288 self._match_r_paren() 3289 3290 return self.expression( 3291 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3292 ) 3293 3294 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3295 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3296 as_modifier and self._match_text_seq("USING", "SAMPLE") 3297 ): 3298 return None 3299 3300 bucket_numerator = None 3301 bucket_denominator = None 3302 bucket_field = None 3303 percent = None 3304 size = None 3305 seed = None 3306 3307 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3308 matched_l_paren = self._match(TokenType.L_PAREN) 3309 3310 if self.TABLESAMPLE_CSV: 3311 num = None 3312 expressions = self._parse_csv(self._parse_primary) 3313 else: 3314 expressions = None 3315 num = ( 3316 self._parse_factor() 3317 if self._match(TokenType.NUMBER, advance=False) 3318 else self._parse_primary() or self._parse_placeholder() 3319 ) 3320 3321 if self._match_text_seq("BUCKET"): 3322 bucket_numerator = self._parse_number() 3323 self._match_text_seq("OUT", "OF") 3324 bucket_denominator = bucket_denominator = self._parse_number() 3325 self._match(TokenType.ON) 3326 bucket_field = self._parse_field() 3327 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3328 percent = num 3329 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3330 size = num 3331 else: 3332 percent = num 3333 3334 if matched_l_paren: 3335 self._match_r_paren() 3336 3337 if self._match(TokenType.L_PAREN): 3338 method = self._parse_var(upper=True) 3339 seed = self._match(TokenType.COMMA) and self._parse_number() 3340 self._match_r_paren() 3341 elif self._match_texts(("SEED", "REPEATABLE")): 3342 seed = self._parse_wrapped(self._parse_number) 3343 3344 return self.expression( 3345 exp.TableSample, 3346 expressions=expressions, 3347 method=method, 3348 bucket_numerator=bucket_numerator, 3349 bucket_denominator=bucket_denominator, 3350 bucket_field=bucket_field, 3351 percent=percent, 3352 size=size, 3353 seed=seed, 3354 ) 3355 3356 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3357 return list(iter(self._parse_pivot, None)) or None 3358 3359 def _parse_joins(self) -> t.Iterator[exp.Join]: 3360 return iter(self._parse_join, None) 3361 3362 # https://duckdb.org/docs/sql/statements/pivot 3363 def _parse_simplified_pivot(self) -> exp.Pivot: 3364 def _parse_on() -> t.Optional[exp.Expression]: 3365 this = self._parse_bitwise() 3366 return self._parse_in(this) if self._match(TokenType.IN) else this 3367 3368 this = self._parse_table() 3369 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3370 using = self._match(TokenType.USING) and self._parse_csv( 3371 lambda: self._parse_alias(self._parse_function()) 3372 ) 3373 group = self._parse_group() 3374 return self.expression( 3375 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3376 ) 3377 3378 def _parse_pivot_in(self) -> exp.In: 3379 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3380 this = self._parse_conjunction() 3381 3382 self._match(TokenType.ALIAS) 3383 alias = self._parse_field() 3384 if alias: 3385 return self.expression(exp.PivotAlias, this=this, alias=alias) 3386 3387 return this 3388 3389 value = self._parse_column() 3390 3391 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3392 self.raise_error("Expecting IN (") 3393 3394 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3395 3396 self._match_r_paren() 3397 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3398 3399 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3400 index = self._index 3401 include_nulls = None 3402 3403 if self._match(TokenType.PIVOT): 3404 unpivot = False 3405 elif self._match(TokenType.UNPIVOT): 3406 unpivot = True 3407 3408 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3409 if self._match_text_seq("INCLUDE", "NULLS"): 3410 include_nulls = True 3411 elif self._match_text_seq("EXCLUDE", "NULLS"): 3412 include_nulls = False 3413 else: 3414 return None 3415 3416 expressions = [] 3417 3418 if not self._match(TokenType.L_PAREN): 3419 self._retreat(index) 3420 return None 3421 3422 if unpivot: 3423 expressions = self._parse_csv(self._parse_column) 3424 else: 3425 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3426 3427 if not expressions: 3428 self.raise_error("Failed to parse PIVOT's aggregation list") 3429 3430 if not self._match(TokenType.FOR): 3431 self.raise_error("Expecting FOR") 3432 3433 field = self._parse_pivot_in() 3434 3435 self._match_r_paren() 3436 3437 pivot = self.expression( 3438 exp.Pivot, 3439 expressions=expressions, 3440 field=field, 3441 unpivot=unpivot, 3442 include_nulls=include_nulls, 3443 ) 3444 3445 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3446 pivot.set("alias", self._parse_table_alias()) 3447 3448 if not unpivot: 3449 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3450 3451 columns: t.List[exp.Expression] = [] 3452 for fld in pivot.args["field"].expressions: 3453 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3454 for name in names: 3455 if self.PREFIXED_PIVOT_COLUMNS: 3456 name = f"{name}_{field_name}" if name else field_name 3457 else: 3458 name = f"{field_name}_{name}" if name else field_name 3459 3460 columns.append(exp.to_identifier(name)) 3461 3462 pivot.set("columns", columns) 3463 3464 return pivot 3465 3466 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3467 return [agg.alias for agg in aggregations] 3468 3469 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3470 if not skip_where_token and not self._match(TokenType.PREWHERE): 3471 return None 3472 3473 return self.expression( 3474 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3475 ) 3476 3477 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3478 if not skip_where_token and not self._match(TokenType.WHERE): 3479 return None 3480 3481 return self.expression( 3482 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3483 ) 3484 3485 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3486 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3487 return None 3488 3489 elements: t.Dict[str, t.Any] = defaultdict(list) 3490 3491 if self._match(TokenType.ALL): 3492 elements["all"] = True 3493 elif self._match(TokenType.DISTINCT): 3494 elements["all"] = False 3495 3496 while True: 3497 expressions = self._parse_csv(self._parse_conjunction) 3498 if expressions: 3499 elements["expressions"].extend(expressions) 3500 3501 grouping_sets = self._parse_grouping_sets() 3502 if grouping_sets: 3503 elements["grouping_sets"].extend(grouping_sets) 3504 3505 rollup = None 3506 cube = None 3507 totals = None 3508 3509 index = self._index 3510 with_ = self._match(TokenType.WITH) 3511 if self._match(TokenType.ROLLUP): 3512 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3513 elements["rollup"].extend(ensure_list(rollup)) 3514 3515 if self._match(TokenType.CUBE): 3516 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3517 elements["cube"].extend(ensure_list(cube)) 3518 3519 if self._match_text_seq("TOTALS"): 3520 totals = True 3521 elements["totals"] = True # type: ignore 3522 3523 if not (grouping_sets or rollup or cube or totals): 3524 if with_: 3525 self._retreat(index) 3526 break 3527 3528 return self.expression(exp.Group, **elements) # type: ignore 3529 3530 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3531 if not self._match(TokenType.GROUPING_SETS): 3532 return None 3533 3534 return self._parse_wrapped_csv(self._parse_grouping_set) 3535 3536 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3537 if self._match(TokenType.L_PAREN): 3538 grouping_set = self._parse_csv(self._parse_column) 3539 self._match_r_paren() 3540 return self.expression(exp.Tuple, expressions=grouping_set) 3541 3542 return self._parse_column() 3543 3544 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3545 if not skip_having_token and not self._match(TokenType.HAVING): 3546 return None 3547 return self.expression(exp.Having, this=self._parse_conjunction()) 3548 3549 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3550 if not self._match(TokenType.QUALIFY): 3551 return None 3552 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3553 3554 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3555 if skip_start_token: 3556 start = None 3557 elif self._match(TokenType.START_WITH): 3558 start = self._parse_conjunction() 3559 else: 3560 return None 3561 3562 self._match(TokenType.CONNECT_BY) 3563 nocycle = self._match_text_seq("NOCYCLE") 3564 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3565 exp.Prior, this=self._parse_bitwise() 3566 ) 3567 connect = self._parse_conjunction() 3568 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3569 3570 if not start and self._match(TokenType.START_WITH): 3571 start = self._parse_conjunction() 3572 3573 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3574 3575 def _parse_name_as_expression(self) -> exp.Alias: 3576 return self.expression( 3577 exp.Alias, 3578 alias=self._parse_id_var(any_token=True), 3579 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3580 ) 3581 3582 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3583 if self._match_text_seq("INTERPOLATE"): 3584 return self._parse_wrapped_csv(self._parse_name_as_expression) 3585 return None 3586 3587 def _parse_order( 3588 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3589 ) -> t.Optional[exp.Expression]: 3590 siblings = None 3591 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3592 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3593 return this 3594 3595 siblings = True 3596 3597 return self.expression( 3598 exp.Order, 3599 this=this, 3600 expressions=self._parse_csv(self._parse_ordered), 3601 interpolate=self._parse_interpolate(), 3602 siblings=siblings, 3603 ) 3604 3605 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3606 if not self._match(token): 3607 return None 3608 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3609 3610 def _parse_ordered( 3611 self, parse_method: t.Optional[t.Callable] = None 3612 ) -> t.Optional[exp.Ordered]: 3613 this = parse_method() if parse_method else self._parse_conjunction() 3614 if not this: 3615 return None 3616 3617 asc = self._match(TokenType.ASC) 3618 desc = self._match(TokenType.DESC) or (asc and False) 3619 3620 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3621 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3622 3623 nulls_first = is_nulls_first or False 3624 explicitly_null_ordered = is_nulls_first or is_nulls_last 3625 3626 if ( 3627 not explicitly_null_ordered 3628 and ( 3629 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3630 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3631 ) 3632 and self.dialect.NULL_ORDERING != "nulls_are_last" 3633 ): 3634 nulls_first = True 3635 3636 if self._match_text_seq("WITH", "FILL"): 3637 with_fill = self.expression( 3638 exp.WithFill, 3639 **{ # type: ignore 3640 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3641 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3642 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3643 }, 3644 ) 3645 else: 3646 with_fill = None 3647 3648 return self.expression( 3649 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3650 ) 3651 3652 def _parse_limit( 3653 self, 3654 this: t.Optional[exp.Expression] = None, 3655 top: bool = False, 3656 skip_limit_token: bool = False, 3657 ) -> t.Optional[exp.Expression]: 3658 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3659 comments = self._prev_comments 3660 if top: 3661 limit_paren = self._match(TokenType.L_PAREN) 3662 expression = self._parse_term() if limit_paren else self._parse_number() 3663 3664 if limit_paren: 3665 self._match_r_paren() 3666 else: 3667 expression = self._parse_term() 3668 3669 if self._match(TokenType.COMMA): 3670 offset = expression 3671 expression = self._parse_term() 3672 else: 3673 offset = None 3674 3675 limit_exp = self.expression( 3676 exp.Limit, 3677 this=this, 3678 expression=expression, 3679 offset=offset, 3680 comments=comments, 3681 expressions=self._parse_limit_by(), 3682 ) 3683 3684 return limit_exp 3685 3686 if self._match(TokenType.FETCH): 3687 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3688 direction = self._prev.text.upper() if direction else "FIRST" 3689 3690 count = self._parse_field(tokens=self.FETCH_TOKENS) 3691 percent = self._match(TokenType.PERCENT) 3692 3693 self._match_set((TokenType.ROW, TokenType.ROWS)) 3694 3695 only = self._match_text_seq("ONLY") 3696 with_ties = self._match_text_seq("WITH", "TIES") 3697 3698 if only and with_ties: 3699 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3700 3701 return self.expression( 3702 exp.Fetch, 3703 direction=direction, 3704 count=count, 3705 percent=percent, 3706 with_ties=with_ties, 3707 ) 3708 3709 return this 3710 3711 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3712 if not self._match(TokenType.OFFSET): 3713 return this 3714 3715 count = self._parse_term() 3716 self._match_set((TokenType.ROW, TokenType.ROWS)) 3717 3718 return self.expression( 3719 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3720 ) 3721 3722 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3723 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3724 3725 def _parse_locks(self) -> t.List[exp.Lock]: 3726 locks = [] 3727 while True: 3728 if self._match_text_seq("FOR", "UPDATE"): 3729 update = True 3730 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3731 "LOCK", "IN", "SHARE", "MODE" 3732 ): 3733 update = False 3734 else: 3735 break 3736 3737 expressions = None 3738 if self._match_text_seq("OF"): 3739 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3740 3741 wait: t.Optional[bool | exp.Expression] = None 3742 if self._match_text_seq("NOWAIT"): 3743 wait = True 3744 elif self._match_text_seq("WAIT"): 3745 wait = self._parse_primary() 3746 elif self._match_text_seq("SKIP", "LOCKED"): 3747 wait = False 3748 3749 locks.append( 3750 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3751 ) 3752 3753 return locks 3754 3755 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3756 while this and self._match_set(self.SET_OPERATIONS): 3757 token_type = self._prev.token_type 3758 3759 if token_type == TokenType.UNION: 3760 operation = exp.Union 3761 elif token_type == TokenType.EXCEPT: 3762 operation = exp.Except 3763 else: 3764 operation = exp.Intersect 3765 3766 comments = self._prev.comments 3767 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3768 by_name = self._match_text_seq("BY", "NAME") 3769 expression = self._parse_select(nested=True, parse_set_operation=False) 3770 3771 this = self.expression( 3772 operation, 3773 comments=comments, 3774 this=this, 3775 distinct=distinct, 3776 by_name=by_name, 3777 expression=expression, 3778 ) 3779 3780 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3781 expression = this.expression 3782 3783 if expression: 3784 for arg in self.UNION_MODIFIERS: 3785 expr = expression.args.get(arg) 3786 if expr: 3787 this.set(arg, expr.pop()) 3788 3789 return this 3790 3791 def _parse_expression(self) -> t.Optional[exp.Expression]: 3792 return self._parse_alias(self._parse_conjunction()) 3793 3794 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3795 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3796 3797 def _parse_equality(self) -> t.Optional[exp.Expression]: 3798 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3799 3800 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3801 return self._parse_tokens(self._parse_range, self.COMPARISON) 3802 3803 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3804 this = this or self._parse_bitwise() 3805 negate = self._match(TokenType.NOT) 3806 3807 if self._match_set(self.RANGE_PARSERS): 3808 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3809 if not expression: 3810 return this 3811 3812 this = expression 3813 elif self._match(TokenType.ISNULL): 3814 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3815 3816 # Postgres supports ISNULL and NOTNULL for conditions. 3817 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3818 if self._match(TokenType.NOTNULL): 3819 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3820 this = self.expression(exp.Not, this=this) 3821 3822 if negate: 3823 this = self.expression(exp.Not, this=this) 3824 3825 if self._match(TokenType.IS): 3826 this = self._parse_is(this) 3827 3828 return this 3829 3830 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3831 index = self._index - 1 3832 negate = self._match(TokenType.NOT) 3833 3834 if self._match_text_seq("DISTINCT", "FROM"): 3835 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3836 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3837 3838 expression = self._parse_null() or self._parse_boolean() 3839 if not expression: 3840 self._retreat(index) 3841 return None 3842 3843 this = self.expression(exp.Is, this=this, expression=expression) 3844 return self.expression(exp.Not, this=this) if negate else this 3845 3846 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3847 unnest = self._parse_unnest(with_alias=False) 3848 if unnest: 3849 this = self.expression(exp.In, this=this, unnest=unnest) 3850 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3851 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3852 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3853 3854 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3855 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3856 else: 3857 this = self.expression(exp.In, this=this, expressions=expressions) 3858 3859 if matched_l_paren: 3860 self._match_r_paren(this) 3861 elif not self._match(TokenType.R_BRACKET, expression=this): 3862 self.raise_error("Expecting ]") 3863 else: 3864 this = self.expression(exp.In, this=this, field=self._parse_field()) 3865 3866 return this 3867 3868 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3869 low = self._parse_bitwise() 3870 self._match(TokenType.AND) 3871 high = self._parse_bitwise() 3872 return self.expression(exp.Between, this=this, low=low, high=high) 3873 3874 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3875 if not self._match(TokenType.ESCAPE): 3876 return this 3877 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3878 3879 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3880 index = self._index 3881 3882 if not self._match(TokenType.INTERVAL) and match_interval: 3883 return None 3884 3885 if self._match(TokenType.STRING, advance=False): 3886 this = self._parse_primary() 3887 else: 3888 this = self._parse_term() 3889 3890 if not this or ( 3891 isinstance(this, exp.Column) 3892 and not this.table 3893 and not this.this.quoted 3894 and this.name.upper() == "IS" 3895 ): 3896 self._retreat(index) 3897 return None 3898 3899 unit = self._parse_function() or ( 3900 not self._match(TokenType.ALIAS, advance=False) 3901 and self._parse_var(any_token=True, upper=True) 3902 ) 3903 3904 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3905 # each INTERVAL expression into this canonical form so it's easy to transpile 3906 if this and this.is_number: 3907 this = exp.Literal.string(this.name) 3908 elif this and this.is_string: 3909 parts = this.name.split() 3910 3911 if len(parts) == 2: 3912 if unit: 3913 # This is not actually a unit, it's something else (e.g. a "window side") 3914 unit = None 3915 self._retreat(self._index - 1) 3916 3917 this = exp.Literal.string(parts[0]) 3918 unit = self.expression(exp.Var, this=parts[1].upper()) 3919 3920 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3921 unit = self.expression( 3922 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3923 ) 3924 3925 return self.expression(exp.Interval, this=this, unit=unit) 3926 3927 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3928 this = self._parse_term() 3929 3930 while True: 3931 if self._match_set(self.BITWISE): 3932 this = self.expression( 3933 self.BITWISE[self._prev.token_type], 3934 this=this, 3935 expression=self._parse_term(), 3936 ) 3937 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3938 this = self.expression( 3939 exp.DPipe, 3940 this=this, 3941 expression=self._parse_term(), 3942 safe=not self.dialect.STRICT_STRING_CONCAT, 3943 ) 3944 elif self._match(TokenType.DQMARK): 3945 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3946 elif self._match_pair(TokenType.LT, TokenType.LT): 3947 this = self.expression( 3948 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3949 ) 3950 elif self._match_pair(TokenType.GT, TokenType.GT): 3951 this = self.expression( 3952 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3953 ) 3954 else: 3955 break 3956 3957 return this 3958 3959 def _parse_term(self) -> t.Optional[exp.Expression]: 3960 return self._parse_tokens(self._parse_factor, self.TERM) 3961 3962 def _parse_factor(self) -> t.Optional[exp.Expression]: 3963 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3964 this = parse_method() 3965 3966 while self._match_set(self.FACTOR): 3967 this = self.expression( 3968 self.FACTOR[self._prev.token_type], 3969 this=this, 3970 comments=self._prev_comments, 3971 expression=parse_method(), 3972 ) 3973 if isinstance(this, exp.Div): 3974 this.args["typed"] = self.dialect.TYPED_DIVISION 3975 this.args["safe"] = self.dialect.SAFE_DIVISION 3976 3977 return this 3978 3979 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3980 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3981 3982 def _parse_unary(self) -> t.Optional[exp.Expression]: 3983 if self._match_set(self.UNARY_PARSERS): 3984 return self.UNARY_PARSERS[self._prev.token_type](self) 3985 return self._parse_at_time_zone(self._parse_type()) 3986 3987 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3988 interval = parse_interval and self._parse_interval() 3989 if interval: 3990 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3991 while True: 3992 index = self._index 3993 self._match(TokenType.PLUS) 3994 3995 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3996 self._retreat(index) 3997 break 3998 3999 interval = self.expression( # type: ignore 4000 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4001 ) 4002 4003 return interval 4004 4005 index = self._index 4006 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4007 this = self._parse_column() 4008 4009 if data_type: 4010 if isinstance(this, exp.Literal): 4011 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4012 if parser: 4013 return parser(self, this, data_type) 4014 return self.expression(exp.Cast, this=this, to=data_type) 4015 if not data_type.expressions: 4016 self._retreat(index) 4017 return self._parse_column() 4018 return self._parse_column_ops(data_type) 4019 4020 return this and self._parse_column_ops(this) 4021 4022 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4023 this = self._parse_type() 4024 if not this: 4025 return None 4026 4027 if isinstance(this, exp.Column) and not this.table: 4028 this = exp.var(this.name.upper()) 4029 4030 return self.expression( 4031 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4032 ) 4033 4034 def _parse_types( 4035 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4036 ) -> t.Optional[exp.Expression]: 4037 index = self._index 4038 4039 prefix = self._match_text_seq("SYSUDTLIB", ".") 4040 4041 if not self._match_set(self.TYPE_TOKENS): 4042 identifier = allow_identifiers and self._parse_id_var( 4043 any_token=False, tokens=(TokenType.VAR,) 4044 ) 4045 if identifier: 4046 tokens = self.dialect.tokenize(identifier.name) 4047 4048 if len(tokens) != 1: 4049 self.raise_error("Unexpected identifier", self._prev) 4050 4051 if tokens[0].token_type in self.TYPE_TOKENS: 4052 self._prev = tokens[0] 4053 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4054 type_name = identifier.name 4055 4056 while self._match(TokenType.DOT): 4057 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4058 4059 return exp.DataType.build(type_name, udt=True) 4060 else: 4061 self._retreat(self._index - 1) 4062 return None 4063 else: 4064 return None 4065 4066 type_token = self._prev.token_type 4067 4068 if type_token == TokenType.PSEUDO_TYPE: 4069 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4070 4071 if type_token == TokenType.OBJECT_IDENTIFIER: 4072 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4073 4074 nested = type_token in self.NESTED_TYPE_TOKENS 4075 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4076 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4077 expressions = None 4078 maybe_func = False 4079 4080 if self._match(TokenType.L_PAREN): 4081 if is_struct: 4082 expressions = self._parse_csv(self._parse_struct_types) 4083 elif nested: 4084 expressions = self._parse_csv( 4085 lambda: self._parse_types( 4086 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4087 ) 4088 ) 4089 elif type_token in self.ENUM_TYPE_TOKENS: 4090 expressions = self._parse_csv(self._parse_equality) 4091 elif is_aggregate: 4092 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4093 any_token=False, tokens=(TokenType.VAR,) 4094 ) 4095 if not func_or_ident or not self._match(TokenType.COMMA): 4096 return None 4097 expressions = self._parse_csv( 4098 lambda: self._parse_types( 4099 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4100 ) 4101 ) 4102 expressions.insert(0, func_or_ident) 4103 else: 4104 expressions = self._parse_csv(self._parse_type_size) 4105 4106 if not expressions or not self._match(TokenType.R_PAREN): 4107 self._retreat(index) 4108 return None 4109 4110 maybe_func = True 4111 4112 this: t.Optional[exp.Expression] = None 4113 values: t.Optional[t.List[exp.Expression]] = None 4114 4115 if nested and self._match(TokenType.LT): 4116 if is_struct: 4117 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4118 else: 4119 expressions = self._parse_csv( 4120 lambda: self._parse_types( 4121 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4122 ) 4123 ) 4124 4125 if not self._match(TokenType.GT): 4126 self.raise_error("Expecting >") 4127 4128 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4129 values = self._parse_csv(self._parse_conjunction) 4130 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4131 4132 if type_token in self.TIMESTAMPS: 4133 if self._match_text_seq("WITH", "TIME", "ZONE"): 4134 maybe_func = False 4135 tz_type = ( 4136 exp.DataType.Type.TIMETZ 4137 if type_token in self.TIMES 4138 else exp.DataType.Type.TIMESTAMPTZ 4139 ) 4140 this = exp.DataType(this=tz_type, expressions=expressions) 4141 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4142 maybe_func = False 4143 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4144 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4145 maybe_func = False 4146 elif type_token == TokenType.INTERVAL: 4147 unit = self._parse_var(upper=True) 4148 if unit: 4149 if self._match_text_seq("TO"): 4150 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4151 4152 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4153 else: 4154 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4155 4156 if maybe_func and check_func: 4157 index2 = self._index 4158 peek = self._parse_string() 4159 4160 if not peek: 4161 self._retreat(index) 4162 return None 4163 4164 self._retreat(index2) 4165 4166 if not this: 4167 if self._match_text_seq("UNSIGNED"): 4168 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4169 if not unsigned_type_token: 4170 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4171 4172 type_token = unsigned_type_token or type_token 4173 4174 this = exp.DataType( 4175 this=exp.DataType.Type[type_token.value], 4176 expressions=expressions, 4177 nested=nested, 4178 values=values, 4179 prefix=prefix, 4180 ) 4181 4182 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4183 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4184 4185 return this 4186 4187 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4188 index = self._index 4189 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4190 self._match(TokenType.COLON) 4191 column_def = self._parse_column_def(this) 4192 4193 if type_required and ( 4194 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4195 ): 4196 self._retreat(index) 4197 return self._parse_types() 4198 4199 return column_def 4200 4201 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4202 if not self._match_text_seq("AT", "TIME", "ZONE"): 4203 return this 4204 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4205 4206 def _parse_column(self) -> t.Optional[exp.Expression]: 4207 this = self._parse_column_reference() 4208 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4209 4210 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4211 this = self._parse_field() 4212 if ( 4213 not this 4214 and self._match(TokenType.VALUES, advance=False) 4215 and self.VALUES_FOLLOWED_BY_PAREN 4216 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4217 ): 4218 this = self._parse_id_var() 4219 4220 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4221 4222 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4223 this = self._parse_bracket(this) 4224 4225 while self._match_set(self.COLUMN_OPERATORS): 4226 op_token = self._prev.token_type 4227 op = self.COLUMN_OPERATORS.get(op_token) 4228 4229 if op_token == TokenType.DCOLON: 4230 field = self._parse_types() 4231 if not field: 4232 self.raise_error("Expected type") 4233 elif op and self._curr: 4234 field = self._parse_column_reference() 4235 else: 4236 field = self._parse_field(any_token=True, anonymous_func=True) 4237 4238 if isinstance(field, exp.Func) and this: 4239 # bigquery allows function calls like x.y.count(...) 4240 # SAFE.SUBSTR(...) 4241 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4242 this = exp.replace_tree( 4243 this, 4244 lambda n: ( 4245 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4246 if n.table 4247 else n.this 4248 ) 4249 if isinstance(n, exp.Column) 4250 else n, 4251 ) 4252 4253 if op: 4254 this = op(self, this, field) 4255 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4256 this = self.expression( 4257 exp.Column, 4258 this=field, 4259 table=this.this, 4260 db=this.args.get("table"), 4261 catalog=this.args.get("db"), 4262 ) 4263 else: 4264 this = self.expression(exp.Dot, this=this, expression=field) 4265 this = self._parse_bracket(this) 4266 return this 4267 4268 def _parse_primary(self) -> t.Optional[exp.Expression]: 4269 if self._match_set(self.PRIMARY_PARSERS): 4270 token_type = self._prev.token_type 4271 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4272 4273 if token_type == TokenType.STRING: 4274 expressions = [primary] 4275 while self._match(TokenType.STRING): 4276 expressions.append(exp.Literal.string(self._prev.text)) 4277 4278 if len(expressions) > 1: 4279 return self.expression(exp.Concat, expressions=expressions) 4280 4281 return primary 4282 4283 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4284 return exp.Literal.number(f"0.{self._prev.text}") 4285 4286 if self._match(TokenType.L_PAREN): 4287 comments = self._prev_comments 4288 query = self._parse_select() 4289 4290 if query: 4291 expressions = [query] 4292 else: 4293 expressions = self._parse_expressions() 4294 4295 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4296 4297 if isinstance(this, exp.UNWRAPPED_QUERIES): 4298 this = self._parse_set_operations( 4299 self._parse_subquery(this=this, parse_alias=False) 4300 ) 4301 elif isinstance(this, exp.Subquery): 4302 this = self._parse_subquery( 4303 this=self._parse_set_operations(this), parse_alias=False 4304 ) 4305 elif len(expressions) > 1: 4306 this = self.expression(exp.Tuple, expressions=expressions) 4307 else: 4308 this = self.expression(exp.Paren, this=this) 4309 4310 if this: 4311 this.add_comments(comments) 4312 4313 self._match_r_paren(expression=this) 4314 return this 4315 4316 return None 4317 4318 def _parse_field( 4319 self, 4320 any_token: bool = False, 4321 tokens: t.Optional[t.Collection[TokenType]] = None, 4322 anonymous_func: bool = False, 4323 ) -> t.Optional[exp.Expression]: 4324 if anonymous_func: 4325 field = ( 4326 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4327 or self._parse_primary() 4328 ) 4329 else: 4330 field = self._parse_primary() or self._parse_function( 4331 anonymous=anonymous_func, any_token=any_token 4332 ) 4333 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4334 4335 def _parse_function( 4336 self, 4337 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4338 anonymous: bool = False, 4339 optional_parens: bool = True, 4340 any_token: bool = False, 4341 ) -> t.Optional[exp.Expression]: 4342 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4343 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4344 fn_syntax = False 4345 if ( 4346 self._match(TokenType.L_BRACE, advance=False) 4347 and self._next 4348 and self._next.text.upper() == "FN" 4349 ): 4350 self._advance(2) 4351 fn_syntax = True 4352 4353 func = self._parse_function_call( 4354 functions=functions, 4355 anonymous=anonymous, 4356 optional_parens=optional_parens, 4357 any_token=any_token, 4358 ) 4359 4360 if fn_syntax: 4361 self._match(TokenType.R_BRACE) 4362 4363 return func 4364 4365 def _parse_function_call( 4366 self, 4367 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4368 anonymous: bool = False, 4369 optional_parens: bool = True, 4370 any_token: bool = False, 4371 ) -> t.Optional[exp.Expression]: 4372 if not self._curr: 4373 return None 4374 4375 comments = self._curr.comments 4376 token_type = self._curr.token_type 4377 this = self._curr.text 4378 upper = this.upper() 4379 4380 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4381 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4382 self._advance() 4383 return self._parse_window(parser(self)) 4384 4385 if not self._next or self._next.token_type != TokenType.L_PAREN: 4386 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4387 self._advance() 4388 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4389 4390 return None 4391 4392 if not any_token and token_type not in self.FUNC_TOKENS: 4393 return None 4394 4395 self._advance(2) 4396 4397 parser = self.FUNCTION_PARSERS.get(upper) 4398 if parser and not anonymous: 4399 this = parser(self) 4400 else: 4401 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4402 4403 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4404 this = self.expression(subquery_predicate, this=self._parse_select()) 4405 self._match_r_paren() 4406 return this 4407 4408 if functions is None: 4409 functions = self.FUNCTIONS 4410 4411 function = functions.get(upper) 4412 4413 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4414 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4415 4416 if alias: 4417 args = self._kv_to_prop_eq(args) 4418 4419 if function and not anonymous: 4420 if "dialect" in function.__code__.co_varnames: 4421 func = function(args, dialect=self.dialect) 4422 else: 4423 func = function(args) 4424 4425 func = self.validate_expression(func, args) 4426 if not self.dialect.NORMALIZE_FUNCTIONS: 4427 func.meta["name"] = this 4428 4429 this = func 4430 else: 4431 if token_type == TokenType.IDENTIFIER: 4432 this = exp.Identifier(this=this, quoted=True) 4433 this = self.expression(exp.Anonymous, this=this, expressions=args) 4434 4435 if isinstance(this, exp.Expression): 4436 this.add_comments(comments) 4437 4438 self._match_r_paren(this) 4439 return self._parse_window(this) 4440 4441 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4442 transformed = [] 4443 4444 for e in expressions: 4445 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4446 if isinstance(e, exp.Alias): 4447 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4448 4449 if not isinstance(e, exp.PropertyEQ): 4450 e = self.expression( 4451 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4452 ) 4453 4454 if isinstance(e.this, exp.Column): 4455 e.this.replace(e.this.this) 4456 4457 transformed.append(e) 4458 4459 return transformed 4460 4461 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4462 return self._parse_column_def(self._parse_id_var()) 4463 4464 def _parse_user_defined_function( 4465 self, kind: t.Optional[TokenType] = None 4466 ) -> t.Optional[exp.Expression]: 4467 this = self._parse_id_var() 4468 4469 while self._match(TokenType.DOT): 4470 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4471 4472 if not self._match(TokenType.L_PAREN): 4473 return this 4474 4475 expressions = self._parse_csv(self._parse_function_parameter) 4476 self._match_r_paren() 4477 return self.expression( 4478 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4479 ) 4480 4481 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4482 literal = self._parse_primary() 4483 if literal: 4484 return self.expression(exp.Introducer, this=token.text, expression=literal) 4485 4486 return self.expression(exp.Identifier, this=token.text) 4487 4488 def _parse_session_parameter(self) -> exp.SessionParameter: 4489 kind = None 4490 this = self._parse_id_var() or self._parse_primary() 4491 4492 if this and self._match(TokenType.DOT): 4493 kind = this.name 4494 this = self._parse_var() or self._parse_primary() 4495 4496 return self.expression(exp.SessionParameter, this=this, kind=kind) 4497 4498 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4499 index = self._index 4500 4501 if self._match(TokenType.L_PAREN): 4502 expressions = t.cast( 4503 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4504 ) 4505 4506 if not self._match(TokenType.R_PAREN): 4507 self._retreat(index) 4508 else: 4509 expressions = [self._parse_id_var()] 4510 4511 if self._match_set(self.LAMBDAS): 4512 return self.LAMBDAS[self._prev.token_type](self, expressions) 4513 4514 self._retreat(index) 4515 4516 this: t.Optional[exp.Expression] 4517 4518 if self._match(TokenType.DISTINCT): 4519 this = self.expression( 4520 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4521 ) 4522 else: 4523 this = self._parse_select_or_expression(alias=alias) 4524 4525 return self._parse_limit( 4526 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4527 ) 4528 4529 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4530 index = self._index 4531 if not self._match(TokenType.L_PAREN): 4532 return this 4533 4534 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4535 # expr can be of both types 4536 if self._match_set(self.SELECT_START_TOKENS): 4537 self._retreat(index) 4538 return this 4539 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4540 self._match_r_paren() 4541 return self.expression(exp.Schema, this=this, expressions=args) 4542 4543 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4544 return self._parse_column_def(self._parse_field(any_token=True)) 4545 4546 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4547 # column defs are not really columns, they're identifiers 4548 if isinstance(this, exp.Column): 4549 this = this.this 4550 4551 kind = self._parse_types(schema=True) 4552 4553 if self._match_text_seq("FOR", "ORDINALITY"): 4554 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4555 4556 constraints: t.List[exp.Expression] = [] 4557 4558 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4559 ("ALIAS", "MATERIALIZED") 4560 ): 4561 persisted = self._prev.text.upper() == "MATERIALIZED" 4562 constraints.append( 4563 self.expression( 4564 exp.ComputedColumnConstraint, 4565 this=self._parse_conjunction(), 4566 persisted=persisted or self._match_text_seq("PERSISTED"), 4567 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4568 ) 4569 ) 4570 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4571 self._match(TokenType.ALIAS) 4572 constraints.append( 4573 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4574 ) 4575 4576 while True: 4577 constraint = self._parse_column_constraint() 4578 if not constraint: 4579 break 4580 constraints.append(constraint) 4581 4582 if not kind and not constraints: 4583 return this 4584 4585 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4586 4587 def _parse_auto_increment( 4588 self, 4589 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4590 start = None 4591 increment = None 4592 4593 if self._match(TokenType.L_PAREN, advance=False): 4594 args = self._parse_wrapped_csv(self._parse_bitwise) 4595 start = seq_get(args, 0) 4596 increment = seq_get(args, 1) 4597 elif self._match_text_seq("START"): 4598 start = self._parse_bitwise() 4599 self._match_text_seq("INCREMENT") 4600 increment = self._parse_bitwise() 4601 4602 if start and increment: 4603 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4604 4605 return exp.AutoIncrementColumnConstraint() 4606 4607 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4608 if not self._match_text_seq("REFRESH"): 4609 self._retreat(self._index - 1) 4610 return None 4611 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4612 4613 def _parse_compress(self) -> exp.CompressColumnConstraint: 4614 if self._match(TokenType.L_PAREN, advance=False): 4615 return self.expression( 4616 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4617 ) 4618 4619 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4620 4621 def _parse_generated_as_identity( 4622 self, 4623 ) -> ( 4624 exp.GeneratedAsIdentityColumnConstraint 4625 | exp.ComputedColumnConstraint 4626 | exp.GeneratedAsRowColumnConstraint 4627 ): 4628 if self._match_text_seq("BY", "DEFAULT"): 4629 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4630 this = self.expression( 4631 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4632 ) 4633 else: 4634 self._match_text_seq("ALWAYS") 4635 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4636 4637 self._match(TokenType.ALIAS) 4638 4639 if self._match_text_seq("ROW"): 4640 start = self._match_text_seq("START") 4641 if not start: 4642 self._match(TokenType.END) 4643 hidden = self._match_text_seq("HIDDEN") 4644 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4645 4646 identity = self._match_text_seq("IDENTITY") 4647 4648 if self._match(TokenType.L_PAREN): 4649 if self._match(TokenType.START_WITH): 4650 this.set("start", self._parse_bitwise()) 4651 if self._match_text_seq("INCREMENT", "BY"): 4652 this.set("increment", self._parse_bitwise()) 4653 if self._match_text_seq("MINVALUE"): 4654 this.set("minvalue", self._parse_bitwise()) 4655 if self._match_text_seq("MAXVALUE"): 4656 this.set("maxvalue", self._parse_bitwise()) 4657 4658 if self._match_text_seq("CYCLE"): 4659 this.set("cycle", True) 4660 elif self._match_text_seq("NO", "CYCLE"): 4661 this.set("cycle", False) 4662 4663 if not identity: 4664 this.set("expression", self._parse_bitwise()) 4665 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4666 args = self._parse_csv(self._parse_bitwise) 4667 this.set("start", seq_get(args, 0)) 4668 this.set("increment", seq_get(args, 1)) 4669 4670 self._match_r_paren() 4671 4672 return this 4673 4674 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4675 self._match_text_seq("LENGTH") 4676 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4677 4678 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4679 if self._match_text_seq("NULL"): 4680 return self.expression(exp.NotNullColumnConstraint) 4681 if self._match_text_seq("CASESPECIFIC"): 4682 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4683 if self._match_text_seq("FOR", "REPLICATION"): 4684 return self.expression(exp.NotForReplicationColumnConstraint) 4685 return None 4686 4687 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4688 if self._match(TokenType.CONSTRAINT): 4689 this = self._parse_id_var() 4690 else: 4691 this = None 4692 4693 if self._match_texts(self.CONSTRAINT_PARSERS): 4694 return self.expression( 4695 exp.ColumnConstraint, 4696 this=this, 4697 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4698 ) 4699 4700 return this 4701 4702 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4703 if not self._match(TokenType.CONSTRAINT): 4704 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4705 4706 return self.expression( 4707 exp.Constraint, 4708 this=self._parse_id_var(), 4709 expressions=self._parse_unnamed_constraints(), 4710 ) 4711 4712 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4713 constraints = [] 4714 while True: 4715 constraint = self._parse_unnamed_constraint() or self._parse_function() 4716 if not constraint: 4717 break 4718 constraints.append(constraint) 4719 4720 return constraints 4721 4722 def _parse_unnamed_constraint( 4723 self, constraints: t.Optional[t.Collection[str]] = None 4724 ) -> t.Optional[exp.Expression]: 4725 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4726 constraints or self.CONSTRAINT_PARSERS 4727 ): 4728 return None 4729 4730 constraint = self._prev.text.upper() 4731 if constraint not in self.CONSTRAINT_PARSERS: 4732 self.raise_error(f"No parser found for schema constraint {constraint}.") 4733 4734 return self.CONSTRAINT_PARSERS[constraint](self) 4735 4736 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4737 self._match_text_seq("KEY") 4738 return self.expression( 4739 exp.UniqueColumnConstraint, 4740 this=self._parse_schema(self._parse_id_var(any_token=False)), 4741 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4742 on_conflict=self._parse_on_conflict(), 4743 ) 4744 4745 def _parse_key_constraint_options(self) -> t.List[str]: 4746 options = [] 4747 while True: 4748 if not self._curr: 4749 break 4750 4751 if self._match(TokenType.ON): 4752 action = None 4753 on = self._advance_any() and self._prev.text 4754 4755 if self._match_text_seq("NO", "ACTION"): 4756 action = "NO ACTION" 4757 elif self._match_text_seq("CASCADE"): 4758 action = "CASCADE" 4759 elif self._match_text_seq("RESTRICT"): 4760 action = "RESTRICT" 4761 elif self._match_pair(TokenType.SET, TokenType.NULL): 4762 action = "SET NULL" 4763 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4764 action = "SET DEFAULT" 4765 else: 4766 self.raise_error("Invalid key constraint") 4767 4768 options.append(f"ON {on} {action}") 4769 elif self._match_text_seq("NOT", "ENFORCED"): 4770 options.append("NOT ENFORCED") 4771 elif self._match_text_seq("DEFERRABLE"): 4772 options.append("DEFERRABLE") 4773 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4774 options.append("INITIALLY DEFERRED") 4775 elif self._match_text_seq("NORELY"): 4776 options.append("NORELY") 4777 elif self._match_text_seq("MATCH", "FULL"): 4778 options.append("MATCH FULL") 4779 else: 4780 break 4781 4782 return options 4783 4784 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4785 if match and not self._match(TokenType.REFERENCES): 4786 return None 4787 4788 expressions = None 4789 this = self._parse_table(schema=True) 4790 options = self._parse_key_constraint_options() 4791 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4792 4793 def _parse_foreign_key(self) -> exp.ForeignKey: 4794 expressions = self._parse_wrapped_id_vars() 4795 reference = self._parse_references() 4796 options = {} 4797 4798 while self._match(TokenType.ON): 4799 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4800 self.raise_error("Expected DELETE or UPDATE") 4801 4802 kind = self._prev.text.lower() 4803 4804 if self._match_text_seq("NO", "ACTION"): 4805 action = "NO ACTION" 4806 elif self._match(TokenType.SET): 4807 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4808 action = "SET " + self._prev.text.upper() 4809 else: 4810 self._advance() 4811 action = self._prev.text.upper() 4812 4813 options[kind] = action 4814 4815 return self.expression( 4816 exp.ForeignKey, 4817 expressions=expressions, 4818 reference=reference, 4819 **options, # type: ignore 4820 ) 4821 4822 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4823 return self._parse_field() 4824 4825 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4826 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4827 self._retreat(self._index - 1) 4828 return None 4829 4830 id_vars = self._parse_wrapped_id_vars() 4831 return self.expression( 4832 exp.PeriodForSystemTimeConstraint, 4833 this=seq_get(id_vars, 0), 4834 expression=seq_get(id_vars, 1), 4835 ) 4836 4837 def _parse_primary_key( 4838 self, wrapped_optional: bool = False, in_props: bool = False 4839 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4840 desc = ( 4841 self._match_set((TokenType.ASC, TokenType.DESC)) 4842 and self._prev.token_type == TokenType.DESC 4843 ) 4844 4845 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4846 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4847 4848 expressions = self._parse_wrapped_csv( 4849 self._parse_primary_key_part, optional=wrapped_optional 4850 ) 4851 options = self._parse_key_constraint_options() 4852 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4853 4854 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4855 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4856 4857 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4858 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4859 return this 4860 4861 bracket_kind = self._prev.token_type 4862 expressions = self._parse_csv( 4863 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4864 ) 4865 4866 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4867 self.raise_error("Expected ]") 4868 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4869 self.raise_error("Expected }") 4870 4871 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4872 if bracket_kind == TokenType.L_BRACE: 4873 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4874 elif not this or this.name.upper() == "ARRAY": 4875 this = self.expression(exp.Array, expressions=expressions) 4876 else: 4877 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4878 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4879 4880 self._add_comments(this) 4881 return self._parse_bracket(this) 4882 4883 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4884 if self._match(TokenType.COLON): 4885 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4886 return this 4887 4888 def _parse_case(self) -> t.Optional[exp.Expression]: 4889 ifs = [] 4890 default = None 4891 4892 comments = self._prev_comments 4893 expression = self._parse_conjunction() 4894 4895 while self._match(TokenType.WHEN): 4896 this = self._parse_conjunction() 4897 self._match(TokenType.THEN) 4898 then = self._parse_conjunction() 4899 ifs.append(self.expression(exp.If, this=this, true=then)) 4900 4901 if self._match(TokenType.ELSE): 4902 default = self._parse_conjunction() 4903 4904 if not self._match(TokenType.END): 4905 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4906 default = exp.column("interval") 4907 else: 4908 self.raise_error("Expected END after CASE", self._prev) 4909 4910 return self.expression( 4911 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4912 ) 4913 4914 def _parse_if(self) -> t.Optional[exp.Expression]: 4915 if self._match(TokenType.L_PAREN): 4916 args = self._parse_csv(self._parse_conjunction) 4917 this = self.validate_expression(exp.If.from_arg_list(args), args) 4918 self._match_r_paren() 4919 else: 4920 index = self._index - 1 4921 4922 if self.NO_PAREN_IF_COMMANDS and index == 0: 4923 return self._parse_as_command(self._prev) 4924 4925 condition = self._parse_conjunction() 4926 4927 if not condition: 4928 self._retreat(index) 4929 return None 4930 4931 self._match(TokenType.THEN) 4932 true = self._parse_conjunction() 4933 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4934 self._match(TokenType.END) 4935 this = self.expression(exp.If, this=condition, true=true, false=false) 4936 4937 return this 4938 4939 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4940 if not self._match_text_seq("VALUE", "FOR"): 4941 self._retreat(self._index - 1) 4942 return None 4943 4944 return self.expression( 4945 exp.NextValueFor, 4946 this=self._parse_column(), 4947 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4948 ) 4949 4950 def _parse_extract(self) -> exp.Extract: 4951 this = self._parse_function() or self._parse_var() or self._parse_type() 4952 4953 if self._match(TokenType.FROM): 4954 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4955 4956 if not self._match(TokenType.COMMA): 4957 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4958 4959 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4960 4961 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4962 this = self._parse_conjunction() 4963 4964 if not self._match(TokenType.ALIAS): 4965 if self._match(TokenType.COMMA): 4966 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4967 4968 self.raise_error("Expected AS after CAST") 4969 4970 fmt = None 4971 to = self._parse_types() 4972 4973 if self._match(TokenType.FORMAT): 4974 fmt_string = self._parse_string() 4975 fmt = self._parse_at_time_zone(fmt_string) 4976 4977 if not to: 4978 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4979 if to.this in exp.DataType.TEMPORAL_TYPES: 4980 this = self.expression( 4981 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4982 this=this, 4983 format=exp.Literal.string( 4984 format_time( 4985 fmt_string.this if fmt_string else "", 4986 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4987 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4988 ) 4989 ), 4990 ) 4991 4992 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4993 this.set("zone", fmt.args["zone"]) 4994 return this 4995 elif not to: 4996 self.raise_error("Expected TYPE after CAST") 4997 elif isinstance(to, exp.Identifier): 4998 to = exp.DataType.build(to.name, udt=True) 4999 elif to.this == exp.DataType.Type.CHAR: 5000 if self._match(TokenType.CHARACTER_SET): 5001 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5002 5003 return self.expression( 5004 exp.Cast if strict else exp.TryCast, 5005 this=this, 5006 to=to, 5007 format=fmt, 5008 safe=safe, 5009 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5010 ) 5011 5012 def _parse_string_agg(self) -> exp.Expression: 5013 if self._match(TokenType.DISTINCT): 5014 args: t.List[t.Optional[exp.Expression]] = [ 5015 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5016 ] 5017 if self._match(TokenType.COMMA): 5018 args.extend(self._parse_csv(self._parse_conjunction)) 5019 else: 5020 args = self._parse_csv(self._parse_conjunction) # type: ignore 5021 5022 index = self._index 5023 if not self._match(TokenType.R_PAREN) and args: 5024 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5025 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5026 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5027 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5028 5029 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5030 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5031 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5032 if not self._match_text_seq("WITHIN", "GROUP"): 5033 self._retreat(index) 5034 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5035 5036 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5037 order = self._parse_order(this=seq_get(args, 0)) 5038 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5039 5040 def _parse_convert( 5041 self, strict: bool, safe: t.Optional[bool] = None 5042 ) -> t.Optional[exp.Expression]: 5043 this = self._parse_bitwise() 5044 5045 if self._match(TokenType.USING): 5046 to: t.Optional[exp.Expression] = self.expression( 5047 exp.CharacterSet, this=self._parse_var() 5048 ) 5049 elif self._match(TokenType.COMMA): 5050 to = self._parse_types() 5051 else: 5052 to = None 5053 5054 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5055 5056 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5057 """ 5058 There are generally two variants of the DECODE function: 5059 5060 - DECODE(bin, charset) 5061 - DECODE(expression, search, result [, search, result] ... [, default]) 5062 5063 The second variant will always be parsed into a CASE expression. Note that NULL 5064 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5065 instead of relying on pattern matching. 5066 """ 5067 args = self._parse_csv(self._parse_conjunction) 5068 5069 if len(args) < 3: 5070 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5071 5072 expression, *expressions = args 5073 if not expression: 5074 return None 5075 5076 ifs = [] 5077 for search, result in zip(expressions[::2], expressions[1::2]): 5078 if not search or not result: 5079 return None 5080 5081 if isinstance(search, exp.Literal): 5082 ifs.append( 5083 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5084 ) 5085 elif isinstance(search, exp.Null): 5086 ifs.append( 5087 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5088 ) 5089 else: 5090 cond = exp.or_( 5091 exp.EQ(this=expression.copy(), expression=search), 5092 exp.and_( 5093 exp.Is(this=expression.copy(), expression=exp.Null()), 5094 exp.Is(this=search.copy(), expression=exp.Null()), 5095 copy=False, 5096 ), 5097 copy=False, 5098 ) 5099 ifs.append(exp.If(this=cond, true=result)) 5100 5101 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5102 5103 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5104 self._match_text_seq("KEY") 5105 key = self._parse_column() 5106 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5107 self._match_text_seq("VALUE") 5108 value = self._parse_bitwise() 5109 5110 if not key and not value: 5111 return None 5112 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5113 5114 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5115 if not this or not self._match_text_seq("FORMAT", "JSON"): 5116 return this 5117 5118 return self.expression(exp.FormatJson, this=this) 5119 5120 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5121 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5122 for value in values: 5123 if self._match_text_seq(value, "ON", on): 5124 return f"{value} ON {on}" 5125 5126 return None 5127 5128 @t.overload 5129 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5130 5131 @t.overload 5132 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5133 5134 def _parse_json_object(self, agg=False): 5135 star = self._parse_star() 5136 expressions = ( 5137 [star] 5138 if star 5139 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5140 ) 5141 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5142 5143 unique_keys = None 5144 if self._match_text_seq("WITH", "UNIQUE"): 5145 unique_keys = True 5146 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5147 unique_keys = False 5148 5149 self._match_text_seq("KEYS") 5150 5151 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5152 self._parse_type() 5153 ) 5154 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5155 5156 return self.expression( 5157 exp.JSONObjectAgg if agg else exp.JSONObject, 5158 expressions=expressions, 5159 null_handling=null_handling, 5160 unique_keys=unique_keys, 5161 return_type=return_type, 5162 encoding=encoding, 5163 ) 5164 5165 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5166 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5167 if not self._match_text_seq("NESTED"): 5168 this = self._parse_id_var() 5169 kind = self._parse_types(allow_identifiers=False) 5170 nested = None 5171 else: 5172 this = None 5173 kind = None 5174 nested = True 5175 5176 path = self._match_text_seq("PATH") and self._parse_string() 5177 nested_schema = nested and self._parse_json_schema() 5178 5179 return self.expression( 5180 exp.JSONColumnDef, 5181 this=this, 5182 kind=kind, 5183 path=path, 5184 nested_schema=nested_schema, 5185 ) 5186 5187 def _parse_json_schema(self) -> exp.JSONSchema: 5188 self._match_text_seq("COLUMNS") 5189 return self.expression( 5190 exp.JSONSchema, 5191 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5192 ) 5193 5194 def _parse_json_table(self) -> exp.JSONTable: 5195 this = self._parse_format_json(self._parse_bitwise()) 5196 path = self._match(TokenType.COMMA) and self._parse_string() 5197 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5198 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5199 schema = self._parse_json_schema() 5200 5201 return exp.JSONTable( 5202 this=this, 5203 schema=schema, 5204 path=path, 5205 error_handling=error_handling, 5206 empty_handling=empty_handling, 5207 ) 5208 5209 def _parse_match_against(self) -> exp.MatchAgainst: 5210 expressions = self._parse_csv(self._parse_column) 5211 5212 self._match_text_seq(")", "AGAINST", "(") 5213 5214 this = self._parse_string() 5215 5216 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5217 modifier = "IN NATURAL LANGUAGE MODE" 5218 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5219 modifier = f"{modifier} WITH QUERY EXPANSION" 5220 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5221 modifier = "IN BOOLEAN MODE" 5222 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5223 modifier = "WITH QUERY EXPANSION" 5224 else: 5225 modifier = None 5226 5227 return self.expression( 5228 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5229 ) 5230 5231 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5232 def _parse_open_json(self) -> exp.OpenJSON: 5233 this = self._parse_bitwise() 5234 path = self._match(TokenType.COMMA) and self._parse_string() 5235 5236 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5237 this = self._parse_field(any_token=True) 5238 kind = self._parse_types() 5239 path = self._parse_string() 5240 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5241 5242 return self.expression( 5243 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5244 ) 5245 5246 expressions = None 5247 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5248 self._match_l_paren() 5249 expressions = self._parse_csv(_parse_open_json_column_def) 5250 5251 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5252 5253 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5254 args = self._parse_csv(self._parse_bitwise) 5255 5256 if self._match(TokenType.IN): 5257 return self.expression( 5258 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5259 ) 5260 5261 if haystack_first: 5262 haystack = seq_get(args, 0) 5263 needle = seq_get(args, 1) 5264 else: 5265 needle = seq_get(args, 0) 5266 haystack = seq_get(args, 1) 5267 5268 return self.expression( 5269 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5270 ) 5271 5272 def _parse_predict(self) -> exp.Predict: 5273 self._match_text_seq("MODEL") 5274 this = self._parse_table() 5275 5276 self._match(TokenType.COMMA) 5277 self._match_text_seq("TABLE") 5278 5279 return self.expression( 5280 exp.Predict, 5281 this=this, 5282 expression=self._parse_table(), 5283 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5284 ) 5285 5286 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5287 args = self._parse_csv(self._parse_table) 5288 return exp.JoinHint(this=func_name.upper(), expressions=args) 5289 5290 def _parse_substring(self) -> exp.Substring: 5291 # Postgres supports the form: substring(string [from int] [for int]) 5292 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5293 5294 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5295 5296 if self._match(TokenType.FROM): 5297 args.append(self._parse_bitwise()) 5298 if self._match(TokenType.FOR): 5299 args.append(self._parse_bitwise()) 5300 5301 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5302 5303 def _parse_trim(self) -> exp.Trim: 5304 # https://www.w3resource.com/sql/character-functions/trim.php 5305 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5306 5307 position = None 5308 collation = None 5309 expression = None 5310 5311 if self._match_texts(self.TRIM_TYPES): 5312 position = self._prev.text.upper() 5313 5314 this = self._parse_bitwise() 5315 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5316 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5317 expression = self._parse_bitwise() 5318 5319 if invert_order: 5320 this, expression = expression, this 5321 5322 if self._match(TokenType.COLLATE): 5323 collation = self._parse_bitwise() 5324 5325 return self.expression( 5326 exp.Trim, this=this, position=position, expression=expression, collation=collation 5327 ) 5328 5329 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5330 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5331 5332 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5333 return self._parse_window(self._parse_id_var(), alias=True) 5334 5335 def _parse_respect_or_ignore_nulls( 5336 self, this: t.Optional[exp.Expression] 5337 ) -> t.Optional[exp.Expression]: 5338 if self._match_text_seq("IGNORE", "NULLS"): 5339 return self.expression(exp.IgnoreNulls, this=this) 5340 if self._match_text_seq("RESPECT", "NULLS"): 5341 return self.expression(exp.RespectNulls, this=this) 5342 return this 5343 5344 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5345 if self._match(TokenType.HAVING): 5346 self._match_texts(("MAX", "MIN")) 5347 max = self._prev.text.upper() != "MIN" 5348 return self.expression( 5349 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5350 ) 5351 5352 return this 5353 5354 def _parse_window( 5355 self, this: t.Optional[exp.Expression], alias: bool = False 5356 ) -> t.Optional[exp.Expression]: 5357 func = this 5358 comments = func.comments if isinstance(func, exp.Expression) else None 5359 5360 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5361 self._match(TokenType.WHERE) 5362 this = self.expression( 5363 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5364 ) 5365 self._match_r_paren() 5366 5367 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5368 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5369 if self._match_text_seq("WITHIN", "GROUP"): 5370 order = self._parse_wrapped(self._parse_order) 5371 this = self.expression(exp.WithinGroup, this=this, expression=order) 5372 5373 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5374 # Some dialects choose to implement and some do not. 5375 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5376 5377 # There is some code above in _parse_lambda that handles 5378 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5379 5380 # The below changes handle 5381 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5382 5383 # Oracle allows both formats 5384 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5385 # and Snowflake chose to do the same for familiarity 5386 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5387 if isinstance(this, exp.AggFunc): 5388 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5389 5390 if ignore_respect and ignore_respect is not this: 5391 ignore_respect.replace(ignore_respect.this) 5392 this = self.expression(ignore_respect.__class__, this=this) 5393 5394 this = self._parse_respect_or_ignore_nulls(this) 5395 5396 # bigquery select from window x AS (partition by ...) 5397 if alias: 5398 over = None 5399 self._match(TokenType.ALIAS) 5400 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5401 return this 5402 else: 5403 over = self._prev.text.upper() 5404 5405 if comments: 5406 func.comments = None # type: ignore 5407 5408 if not self._match(TokenType.L_PAREN): 5409 return self.expression( 5410 exp.Window, 5411 comments=comments, 5412 this=this, 5413 alias=self._parse_id_var(False), 5414 over=over, 5415 ) 5416 5417 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5418 5419 first = self._match(TokenType.FIRST) 5420 if self._match_text_seq("LAST"): 5421 first = False 5422 5423 partition, order = self._parse_partition_and_order() 5424 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5425 5426 if kind: 5427 self._match(TokenType.BETWEEN) 5428 start = self._parse_window_spec() 5429 self._match(TokenType.AND) 5430 end = self._parse_window_spec() 5431 5432 spec = self.expression( 5433 exp.WindowSpec, 5434 kind=kind, 5435 start=start["value"], 5436 start_side=start["side"], 5437 end=end["value"], 5438 end_side=end["side"], 5439 ) 5440 else: 5441 spec = None 5442 5443 self._match_r_paren() 5444 5445 window = self.expression( 5446 exp.Window, 5447 comments=comments, 5448 this=this, 5449 partition_by=partition, 5450 order=order, 5451 spec=spec, 5452 alias=window_alias, 5453 over=over, 5454 first=first, 5455 ) 5456 5457 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5458 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5459 return self._parse_window(window, alias=alias) 5460 5461 return window 5462 5463 def _parse_partition_and_order( 5464 self, 5465 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5466 return self._parse_partition_by(), self._parse_order() 5467 5468 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5469 self._match(TokenType.BETWEEN) 5470 5471 return { 5472 "value": ( 5473 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5474 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5475 or self._parse_bitwise() 5476 ), 5477 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5478 } 5479 5480 def _parse_alias( 5481 self, this: t.Optional[exp.Expression], explicit: bool = False 5482 ) -> t.Optional[exp.Expression]: 5483 any_token = self._match(TokenType.ALIAS) 5484 comments = self._prev_comments 5485 5486 if explicit and not any_token: 5487 return this 5488 5489 if self._match(TokenType.L_PAREN): 5490 aliases = self.expression( 5491 exp.Aliases, 5492 comments=comments, 5493 this=this, 5494 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5495 ) 5496 self._match_r_paren(aliases) 5497 return aliases 5498 5499 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5500 self.STRING_ALIASES and self._parse_string_as_identifier() 5501 ) 5502 5503 if alias: 5504 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5505 column = this.this 5506 5507 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5508 if not this.comments and column and column.comments: 5509 this.comments = column.comments 5510 column.comments = None 5511 5512 return this 5513 5514 def _parse_id_var( 5515 self, 5516 any_token: bool = True, 5517 tokens: t.Optional[t.Collection[TokenType]] = None, 5518 ) -> t.Optional[exp.Expression]: 5519 identifier = self._parse_identifier() 5520 if identifier: 5521 return identifier 5522 5523 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5524 quoted = self._prev.token_type == TokenType.STRING 5525 return exp.Identifier(this=self._prev.text, quoted=quoted) 5526 5527 return None 5528 5529 def _parse_string(self) -> t.Optional[exp.Expression]: 5530 if self._match_set(self.STRING_PARSERS): 5531 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5532 return self._parse_placeholder() 5533 5534 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5535 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5536 5537 def _parse_number(self) -> t.Optional[exp.Expression]: 5538 if self._match_set(self.NUMERIC_PARSERS): 5539 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5540 return self._parse_placeholder() 5541 5542 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5543 if self._match(TokenType.IDENTIFIER): 5544 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5545 return self._parse_placeholder() 5546 5547 def _parse_var( 5548 self, 5549 any_token: bool = False, 5550 tokens: t.Optional[t.Collection[TokenType]] = None, 5551 upper: bool = False, 5552 ) -> t.Optional[exp.Expression]: 5553 if ( 5554 (any_token and self._advance_any()) 5555 or self._match(TokenType.VAR) 5556 or (self._match_set(tokens) if tokens else False) 5557 ): 5558 return self.expression( 5559 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5560 ) 5561 return self._parse_placeholder() 5562 5563 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5564 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5565 self._advance() 5566 return self._prev 5567 return None 5568 5569 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5570 return self._parse_var() or self._parse_string() 5571 5572 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5573 return self._parse_primary() or self._parse_var(any_token=True) 5574 5575 def _parse_null(self) -> t.Optional[exp.Expression]: 5576 if self._match_set(self.NULL_TOKENS): 5577 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5578 return self._parse_placeholder() 5579 5580 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5581 if self._match(TokenType.TRUE): 5582 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5583 if self._match(TokenType.FALSE): 5584 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5585 return self._parse_placeholder() 5586 5587 def _parse_star(self) -> t.Optional[exp.Expression]: 5588 if self._match(TokenType.STAR): 5589 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5590 return self._parse_placeholder() 5591 5592 def _parse_parameter(self) -> exp.Parameter: 5593 self._match(TokenType.L_BRACE) 5594 this = self._parse_identifier() or self._parse_primary_or_var() 5595 expression = self._match(TokenType.COLON) and ( 5596 self._parse_identifier() or self._parse_primary_or_var() 5597 ) 5598 self._match(TokenType.R_BRACE) 5599 return self.expression(exp.Parameter, this=this, expression=expression) 5600 5601 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5602 if self._match_set(self.PLACEHOLDER_PARSERS): 5603 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5604 if placeholder: 5605 return placeholder 5606 self._advance(-1) 5607 return None 5608 5609 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5610 if not self._match(TokenType.EXCEPT): 5611 return None 5612 if self._match(TokenType.L_PAREN, advance=False): 5613 return self._parse_wrapped_csv(self._parse_column) 5614 5615 except_column = self._parse_column() 5616 return [except_column] if except_column else None 5617 5618 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5619 if not self._match(TokenType.REPLACE): 5620 return None 5621 if self._match(TokenType.L_PAREN, advance=False): 5622 return self._parse_wrapped_csv(self._parse_expression) 5623 5624 replace_expression = self._parse_expression() 5625 return [replace_expression] if replace_expression else None 5626 5627 def _parse_csv( 5628 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5629 ) -> t.List[exp.Expression]: 5630 parse_result = parse_method() 5631 items = [parse_result] if parse_result is not None else [] 5632 5633 while self._match(sep): 5634 self._add_comments(parse_result) 5635 parse_result = parse_method() 5636 if parse_result is not None: 5637 items.append(parse_result) 5638 5639 return items 5640 5641 def _parse_tokens( 5642 self, parse_method: t.Callable, expressions: t.Dict 5643 ) -> t.Optional[exp.Expression]: 5644 this = parse_method() 5645 5646 while self._match_set(expressions): 5647 this = self.expression( 5648 expressions[self._prev.token_type], 5649 this=this, 5650 comments=self._prev_comments, 5651 expression=parse_method(), 5652 ) 5653 5654 return this 5655 5656 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5657 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5658 5659 def _parse_wrapped_csv( 5660 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5661 ) -> t.List[exp.Expression]: 5662 return self._parse_wrapped( 5663 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5664 ) 5665 5666 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5667 wrapped = self._match(TokenType.L_PAREN) 5668 if not wrapped and not optional: 5669 self.raise_error("Expecting (") 5670 parse_result = parse_method() 5671 if wrapped: 5672 self._match_r_paren() 5673 return parse_result 5674 5675 def _parse_expressions(self) -> t.List[exp.Expression]: 5676 return self._parse_csv(self._parse_expression) 5677 5678 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5679 return self._parse_select() or self._parse_set_operations( 5680 self._parse_expression() if alias else self._parse_conjunction() 5681 ) 5682 5683 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5684 return self._parse_query_modifiers( 5685 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5686 ) 5687 5688 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5689 this = None 5690 if self._match_texts(self.TRANSACTION_KIND): 5691 this = self._prev.text 5692 5693 self._match_texts(("TRANSACTION", "WORK")) 5694 5695 modes = [] 5696 while True: 5697 mode = [] 5698 while self._match(TokenType.VAR): 5699 mode.append(self._prev.text) 5700 5701 if mode: 5702 modes.append(" ".join(mode)) 5703 if not self._match(TokenType.COMMA): 5704 break 5705 5706 return self.expression(exp.Transaction, this=this, modes=modes) 5707 5708 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5709 chain = None 5710 savepoint = None 5711 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5712 5713 self._match_texts(("TRANSACTION", "WORK")) 5714 5715 if self._match_text_seq("TO"): 5716 self._match_text_seq("SAVEPOINT") 5717 savepoint = self._parse_id_var() 5718 5719 if self._match(TokenType.AND): 5720 chain = not self._match_text_seq("NO") 5721 self._match_text_seq("CHAIN") 5722 5723 if is_rollback: 5724 return self.expression(exp.Rollback, savepoint=savepoint) 5725 5726 return self.expression(exp.Commit, chain=chain) 5727 5728 def _parse_refresh(self) -> exp.Refresh: 5729 self._match(TokenType.TABLE) 5730 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5731 5732 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5733 if not self._match_text_seq("ADD"): 5734 return None 5735 5736 self._match(TokenType.COLUMN) 5737 exists_column = self._parse_exists(not_=True) 5738 expression = self._parse_field_def() 5739 5740 if expression: 5741 expression.set("exists", exists_column) 5742 5743 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5744 if self._match_texts(("FIRST", "AFTER")): 5745 position = self._prev.text 5746 column_position = self.expression( 5747 exp.ColumnPosition, this=self._parse_column(), position=position 5748 ) 5749 expression.set("position", column_position) 5750 5751 return expression 5752 5753 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5754 drop = self._match(TokenType.DROP) and self._parse_drop() 5755 if drop and not isinstance(drop, exp.Command): 5756 drop.set("kind", drop.args.get("kind", "COLUMN")) 5757 return drop 5758 5759 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5760 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5761 return self.expression( 5762 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5763 ) 5764 5765 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5766 index = self._index - 1 5767 5768 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5769 return self._parse_csv( 5770 lambda: self.expression( 5771 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5772 ) 5773 ) 5774 5775 self._retreat(index) 5776 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5777 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5778 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5779 5780 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5781 self._match(TokenType.COLUMN) 5782 column = self._parse_field(any_token=True) 5783 5784 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5785 return self.expression(exp.AlterColumn, this=column, drop=True) 5786 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5787 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5788 if self._match(TokenType.COMMENT): 5789 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5790 5791 self._match_text_seq("SET", "DATA") 5792 self._match_text_seq("TYPE") 5793 return self.expression( 5794 exp.AlterColumn, 5795 this=column, 5796 dtype=self._parse_types(), 5797 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5798 using=self._match(TokenType.USING) and self._parse_conjunction(), 5799 ) 5800 5801 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5802 index = self._index - 1 5803 5804 partition_exists = self._parse_exists() 5805 if self._match(TokenType.PARTITION, advance=False): 5806 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5807 5808 self._retreat(index) 5809 return self._parse_csv(self._parse_drop_column) 5810 5811 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5812 if self._match(TokenType.COLUMN): 5813 exists = self._parse_exists() 5814 old_column = self._parse_column() 5815 to = self._match_text_seq("TO") 5816 new_column = self._parse_column() 5817 5818 if old_column is None or to is None or new_column is None: 5819 return None 5820 5821 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5822 5823 self._match_text_seq("TO") 5824 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5825 5826 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5827 start = self._prev 5828 5829 if not self._match(TokenType.TABLE): 5830 return self._parse_as_command(start) 5831 5832 exists = self._parse_exists() 5833 only = self._match_text_seq("ONLY") 5834 this = self._parse_table(schema=True) 5835 5836 if self._next: 5837 self._advance() 5838 5839 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5840 if parser: 5841 actions = ensure_list(parser(self)) 5842 options = self._parse_csv(self._parse_property) 5843 5844 if not self._curr and actions: 5845 return self.expression( 5846 exp.AlterTable, 5847 this=this, 5848 exists=exists, 5849 actions=actions, 5850 only=only, 5851 options=options, 5852 ) 5853 5854 return self._parse_as_command(start) 5855 5856 def _parse_merge(self) -> exp.Merge: 5857 self._match(TokenType.INTO) 5858 target = self._parse_table() 5859 5860 if target and self._match(TokenType.ALIAS, advance=False): 5861 target.set("alias", self._parse_table_alias()) 5862 5863 self._match(TokenType.USING) 5864 using = self._parse_table() 5865 5866 self._match(TokenType.ON) 5867 on = self._parse_conjunction() 5868 5869 return self.expression( 5870 exp.Merge, 5871 this=target, 5872 using=using, 5873 on=on, 5874 expressions=self._parse_when_matched(), 5875 ) 5876 5877 def _parse_when_matched(self) -> t.List[exp.When]: 5878 whens = [] 5879 5880 while self._match(TokenType.WHEN): 5881 matched = not self._match(TokenType.NOT) 5882 self._match_text_seq("MATCHED") 5883 source = ( 5884 False 5885 if self._match_text_seq("BY", "TARGET") 5886 else self._match_text_seq("BY", "SOURCE") 5887 ) 5888 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5889 5890 self._match(TokenType.THEN) 5891 5892 if self._match(TokenType.INSERT): 5893 _this = self._parse_star() 5894 if _this: 5895 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5896 else: 5897 then = self.expression( 5898 exp.Insert, 5899 this=self._parse_value(), 5900 expression=self._match_text_seq("VALUES") and self._parse_value(), 5901 ) 5902 elif self._match(TokenType.UPDATE): 5903 expressions = self._parse_star() 5904 if expressions: 5905 then = self.expression(exp.Update, expressions=expressions) 5906 else: 5907 then = self.expression( 5908 exp.Update, 5909 expressions=self._match(TokenType.SET) 5910 and self._parse_csv(self._parse_equality), 5911 ) 5912 elif self._match(TokenType.DELETE): 5913 then = self.expression(exp.Var, this=self._prev.text) 5914 else: 5915 then = None 5916 5917 whens.append( 5918 self.expression( 5919 exp.When, 5920 matched=matched, 5921 source=source, 5922 condition=condition, 5923 then=then, 5924 ) 5925 ) 5926 return whens 5927 5928 def _parse_show(self) -> t.Optional[exp.Expression]: 5929 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5930 if parser: 5931 return parser(self) 5932 return self._parse_as_command(self._prev) 5933 5934 def _parse_set_item_assignment( 5935 self, kind: t.Optional[str] = None 5936 ) -> t.Optional[exp.Expression]: 5937 index = self._index 5938 5939 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5940 return self._parse_set_transaction(global_=kind == "GLOBAL") 5941 5942 left = self._parse_primary() or self._parse_id_var() 5943 assignment_delimiter = self._match_texts(("=", "TO")) 5944 5945 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5946 self._retreat(index) 5947 return None 5948 5949 right = self._parse_statement() or self._parse_id_var() 5950 this = self.expression(exp.EQ, this=left, expression=right) 5951 5952 return self.expression(exp.SetItem, this=this, kind=kind) 5953 5954 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5955 self._match_text_seq("TRANSACTION") 5956 characteristics = self._parse_csv( 5957 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5958 ) 5959 return self.expression( 5960 exp.SetItem, 5961 expressions=characteristics, 5962 kind="TRANSACTION", 5963 **{"global": global_}, # type: ignore 5964 ) 5965 5966 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5967 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5968 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5969 5970 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5971 index = self._index 5972 set_ = self.expression( 5973 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5974 ) 5975 5976 if self._curr: 5977 self._retreat(index) 5978 return self._parse_as_command(self._prev) 5979 5980 return set_ 5981 5982 def _parse_var_from_options( 5983 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5984 ) -> t.Optional[exp.Var]: 5985 start = self._curr 5986 if not start: 5987 return None 5988 5989 option = start.text.upper() 5990 continuations = options.get(option) 5991 5992 index = self._index 5993 self._advance() 5994 for keywords in continuations or []: 5995 if isinstance(keywords, str): 5996 keywords = (keywords,) 5997 5998 if self._match_text_seq(*keywords): 5999 option = f"{option} {' '.join(keywords)}" 6000 break 6001 else: 6002 if continuations or continuations is None: 6003 if raise_unmatched: 6004 self.raise_error(f"Unknown option {option}") 6005 6006 self._retreat(index) 6007 return None 6008 6009 return exp.var(option) 6010 6011 def _parse_as_command(self, start: Token) -> exp.Command: 6012 while self._curr: 6013 self._advance() 6014 text = self._find_sql(start, self._prev) 6015 size = len(start.text) 6016 self._warn_unsupported() 6017 return exp.Command(this=text[:size], expression=text[size:]) 6018 6019 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6020 settings = [] 6021 6022 self._match_l_paren() 6023 kind = self._parse_id_var() 6024 6025 if self._match(TokenType.L_PAREN): 6026 while True: 6027 key = self._parse_id_var() 6028 value = self._parse_primary() 6029 6030 if not key and value is None: 6031 break 6032 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6033 self._match(TokenType.R_PAREN) 6034 6035 self._match_r_paren() 6036 6037 return self.expression( 6038 exp.DictProperty, 6039 this=this, 6040 kind=kind.this if kind else None, 6041 settings=settings, 6042 ) 6043 6044 def _parse_dict_range(self, this: str) -> exp.DictRange: 6045 self._match_l_paren() 6046 has_min = self._match_text_seq("MIN") 6047 if has_min: 6048 min = self._parse_var() or self._parse_primary() 6049 self._match_text_seq("MAX") 6050 max = self._parse_var() or self._parse_primary() 6051 else: 6052 max = self._parse_var() or self._parse_primary() 6053 min = exp.Literal.number(0) 6054 self._match_r_paren() 6055 return self.expression(exp.DictRange, this=this, min=min, max=max) 6056 6057 def _parse_comprehension( 6058 self, this: t.Optional[exp.Expression] 6059 ) -> t.Optional[exp.Comprehension]: 6060 index = self._index 6061 expression = self._parse_column() 6062 if not self._match(TokenType.IN): 6063 self._retreat(index - 1) 6064 return None 6065 iterator = self._parse_column() 6066 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6067 return self.expression( 6068 exp.Comprehension, 6069 this=this, 6070 expression=expression, 6071 iterator=iterator, 6072 condition=condition, 6073 ) 6074 6075 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6076 if self._match(TokenType.HEREDOC_STRING): 6077 return self.expression(exp.Heredoc, this=self._prev.text) 6078 6079 if not self._match_text_seq("$"): 6080 return None 6081 6082 tags = ["$"] 6083 tag_text = None 6084 6085 if self._is_connected(): 6086 self._advance() 6087 tags.append(self._prev.text.upper()) 6088 else: 6089 self.raise_error("No closing $ found") 6090 6091 if tags[-1] != "$": 6092 if self._is_connected() and self._match_text_seq("$"): 6093 tag_text = tags[-1] 6094 tags.append("$") 6095 else: 6096 self.raise_error("No closing $ found") 6097 6098 heredoc_start = self._curr 6099 6100 while self._curr: 6101 if self._match_text_seq(*tags, advance=False): 6102 this = self._find_sql(heredoc_start, self._prev) 6103 self._advance(len(tags)) 6104 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6105 6106 self._advance() 6107 6108 self.raise_error(f"No closing {''.join(tags)} found") 6109 return None 6110 6111 def _find_parser( 6112 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6113 ) -> t.Optional[t.Callable]: 6114 if not self._curr: 6115 return None 6116 6117 index = self._index 6118 this = [] 6119 while True: 6120 # The current token might be multiple words 6121 curr = self._curr.text.upper() 6122 key = curr.split(" ") 6123 this.append(curr) 6124 6125 self._advance() 6126 result, trie = in_trie(trie, key) 6127 if result == TrieResult.FAILED: 6128 break 6129 6130 if result == TrieResult.EXISTS: 6131 subparser = parsers[" ".join(this)] 6132 return subparser 6133 6134 self._retreat(index) 6135 return None 6136 6137 def _match(self, token_type, advance=True, expression=None): 6138 if not self._curr: 6139 return None 6140 6141 if self._curr.token_type == token_type: 6142 if advance: 6143 self._advance() 6144 self._add_comments(expression) 6145 return True 6146 6147 return None 6148 6149 def _match_set(self, types, advance=True): 6150 if not self._curr: 6151 return None 6152 6153 if self._curr.token_type in types: 6154 if advance: 6155 self._advance() 6156 return True 6157 6158 return None 6159 6160 def _match_pair(self, token_type_a, token_type_b, advance=True): 6161 if not self._curr or not self._next: 6162 return None 6163 6164 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6165 if advance: 6166 self._advance(2) 6167 return True 6168 6169 return None 6170 6171 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6172 if not self._match(TokenType.L_PAREN, expression=expression): 6173 self.raise_error("Expecting (") 6174 6175 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6176 if not self._match(TokenType.R_PAREN, expression=expression): 6177 self.raise_error("Expecting )") 6178 6179 def _match_texts(self, texts, advance=True): 6180 if self._curr and self._curr.text.upper() in texts: 6181 if advance: 6182 self._advance() 6183 return True 6184 return None 6185 6186 def _match_text_seq(self, *texts, advance=True): 6187 index = self._index 6188 for text in texts: 6189 if self._curr and self._curr.text.upper() == text: 6190 self._advance() 6191 else: 6192 self._retreat(index) 6193 return None 6194 6195 if not advance: 6196 self._retreat(index) 6197 6198 return True 6199 6200 def _replace_lambda( 6201 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6202 ) -> t.Optional[exp.Expression]: 6203 if not node: 6204 return node 6205 6206 for column in node.find_all(exp.Column): 6207 if column.parts[0].name in lambda_variables: 6208 dot_or_id = column.to_dot() if column.table else column.this 6209 parent = column.parent 6210 6211 while isinstance(parent, exp.Dot): 6212 if not isinstance(parent.parent, exp.Dot): 6213 parent.replace(dot_or_id) 6214 break 6215 parent = parent.parent 6216 else: 6217 if column is node: 6218 node = dot_or_id 6219 else: 6220 column.replace(dot_or_id) 6221 return node 6222 6223 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6224 start = self._prev 6225 6226 # Not to be confused with TRUNCATE(number, decimals) function call 6227 if self._match(TokenType.L_PAREN): 6228 self._retreat(self._index - 2) 6229 return self._parse_function() 6230 6231 # Clickhouse supports TRUNCATE DATABASE as well 6232 is_database = self._match(TokenType.DATABASE) 6233 6234 self._match(TokenType.TABLE) 6235 6236 exists = self._parse_exists(not_=False) 6237 6238 expressions = self._parse_csv( 6239 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6240 ) 6241 6242 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6243 6244 if self._match_text_seq("RESTART", "IDENTITY"): 6245 identity = "RESTART" 6246 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6247 identity = "CONTINUE" 6248 else: 6249 identity = None 6250 6251 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6252 option = self._prev.text 6253 else: 6254 option = None 6255 6256 partition = self._parse_partition() 6257 6258 # Fallback case 6259 if self._curr: 6260 return self._parse_as_command(start) 6261 6262 return self.expression( 6263 exp.TruncateTable, 6264 expressions=expressions, 6265 is_database=is_database, 6266 exists=exists, 6267 cluster=cluster, 6268 identity=identity, 6269 option=option, 6270 partition=partition, 6271 ) 6272 6273 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6274 this = self._parse_ordered(self._parse_opclass) 6275 6276 if not self._match(TokenType.WITH): 6277 return this 6278 6279 op = self._parse_var(any_token=True) 6280 6281 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1135 def __init__( 1136 self, 1137 error_level: t.Optional[ErrorLevel] = None, 1138 error_message_context: int = 100, 1139 max_errors: int = 3, 1140 dialect: DialectType = None, 1141 ): 1142 from sqlglot.dialects import Dialect 1143 1144 self.error_level = error_level or ErrorLevel.IMMEDIATE 1145 self.error_message_context = error_message_context 1146 self.max_errors = max_errors 1147 self.dialect = Dialect.get_or_raise(dialect) 1148 self.reset()
1160 def parse( 1161 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1162 ) -> t.List[t.Optional[exp.Expression]]: 1163 """ 1164 Parses a list of tokens and returns a list of syntax trees, one tree 1165 per parsed SQL statement. 1166 1167 Args: 1168 raw_tokens: The list of tokens. 1169 sql: The original SQL string, used to produce helpful debug messages. 1170 1171 Returns: 1172 The list of the produced syntax trees. 1173 """ 1174 return self._parse( 1175 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1176 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1178 def parse_into( 1179 self, 1180 expression_types: exp.IntoType, 1181 raw_tokens: t.List[Token], 1182 sql: t.Optional[str] = None, 1183 ) -> t.List[t.Optional[exp.Expression]]: 1184 """ 1185 Parses a list of tokens into a given Expression type. If a collection of Expression 1186 types is given instead, this method will try to parse the token list into each one 1187 of them, stopping at the first for which the parsing succeeds. 1188 1189 Args: 1190 expression_types: The expression type(s) to try and parse the token list into. 1191 raw_tokens: The list of tokens. 1192 sql: The original SQL string, used to produce helpful debug messages. 1193 1194 Returns: 1195 The target Expression. 1196 """ 1197 errors = [] 1198 for expression_type in ensure_list(expression_types): 1199 parser = self.EXPRESSION_PARSERS.get(expression_type) 1200 if not parser: 1201 raise TypeError(f"No parser registered for {expression_type}") 1202 1203 try: 1204 return self._parse(parser, raw_tokens, sql) 1205 except ParseError as e: 1206 e.errors[0]["into_expression"] = expression_type 1207 errors.append(e) 1208 1209 raise ParseError( 1210 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1211 errors=merge_errors(errors), 1212 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1249 def check_errors(self) -> None: 1250 """Logs or raises any found errors, depending on the chosen error level setting.""" 1251 if self.error_level == ErrorLevel.WARN: 1252 for error in self.errors: 1253 logger.error(str(error)) 1254 elif self.error_level == ErrorLevel.RAISE and self.errors: 1255 raise ParseError( 1256 concat_messages(self.errors, self.max_errors), 1257 errors=merge_errors(self.errors), 1258 )
Logs or raises any found errors, depending on the chosen error level setting.
1260 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1261 """ 1262 Appends an error in the list of recorded errors or raises it, depending on the chosen 1263 error level setting. 1264 """ 1265 token = token or self._curr or self._prev or Token.string("") 1266 start = token.start 1267 end = token.end + 1 1268 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1269 highlight = self.sql[start:end] 1270 end_context = self.sql[end : end + self.error_message_context] 1271 1272 error = ParseError.new( 1273 f"{message}. Line {token.line}, Col: {token.col}.\n" 1274 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1275 description=message, 1276 line=token.line, 1277 col=token.col, 1278 start_context=start_context, 1279 highlight=highlight, 1280 end_context=end_context, 1281 ) 1282 1283 if self.error_level == ErrorLevel.IMMEDIATE: 1284 raise error 1285 1286 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1288 def expression( 1289 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1290 ) -> E: 1291 """ 1292 Creates a new, validated Expression. 1293 1294 Args: 1295 exp_class: The expression class to instantiate. 1296 comments: An optional list of comments to attach to the expression. 1297 kwargs: The arguments to set for the expression along with their respective values. 1298 1299 Returns: 1300 The target expression. 1301 """ 1302 instance = exp_class(**kwargs) 1303 instance.add_comments(comments) if comments else self._add_comments(instance) 1304 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1311 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1312 """ 1313 Validates an Expression, making sure that all its mandatory arguments are set. 1314 1315 Args: 1316 expression: The expression to validate. 1317 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1318 1319 Returns: 1320 The validated expression. 1321 """ 1322 if self.error_level != ErrorLevel.IGNORE: 1323 for error_message in expression.error_messages(args): 1324 self.raise_error(error_message) 1325 1326 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.